Merge branch 'topic/core-cleanup' into for-linus
[safe/jmp/linux-2.6] / drivers / gpu / drm / radeon / r300_cmdbuf.c
index 702df45..c5c2742 100644 (file)
 
 #include "drmP.h"
 #include "drm.h"
+#include "drm_buffer.h"
 #include "radeon_drm.h"
 #include "radeon_drv.h"
 #include "r300_reg.h"
 
+#include <asm/unaligned.h>
+
 #define R300_SIMULTANEOUS_CLIPRECTS            4
 
 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
@@ -77,6 +80,9 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
                                return -EFAULT;
                        }
 
+                       box.x2--; /* Hardware expects inclusive bottom-right corner */
+                       box.y2--;
+
                        if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
                                box.x1 = (box.x1) &
                                        R300_CLIPRECT_MASK;
@@ -95,8 +101,8 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
                                        R300_CLIPRECT_MASK;
                                box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
                                        R300_CLIPRECT_MASK;
-
                        }
+
                        OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
                                 (box.y1 << R300_CLIPRECT_Y_SHIFT));
                        OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
@@ -136,6 +142,18 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
                ADVANCE_RING();
        }
 
+       /* flus cache and wait idle clean after cliprect change */
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(R300_RB3D_DC_FLUSH);
+       ADVANCE_RING();
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+       ADVANCE_RING();
+       /* set flush flag */
+       dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
        return 0;
 }
 
@@ -166,13 +184,13 @@ void r300_init_reg_flags(struct drm_device *dev)
        ADD_RANGE(0x21DC, 1);
        ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
        ADD_RANGE(R300_VAP_CLIP_X_0, 4);
-       ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
+       ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
        ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
        ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
        ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
        ADD_RANGE(R300_GB_ENABLE, 1);
        ADD_RANGE(R300_GB_MSPOS0, 5);
-       ADD_RANGE(R300_TX_CNTL, 1);
+       ADD_RANGE(R300_TX_INVALTAGS, 1);
        ADD_RANGE(R300_TX_ENABLE, 1);
        ADD_RANGE(0x4200, 4);
        ADD_RANGE(0x4214, 1);
@@ -190,6 +208,10 @@ void r300_init_reg_flags(struct drm_device *dev)
        ADD_RANGE(0x42C0, 2);
        ADD_RANGE(R300_RS_CNTL_0, 2);
 
+       ADD_RANGE(R300_SU_REG_DEST, 1);
+       if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530)
+               ADD_RANGE(RV530_FG_ZBREG_DEST, 1);
+
        ADD_RANGE(R300_SC_HYPERZ, 2);
        ADD_RANGE(0x43E8, 1);
 
@@ -215,6 +237,7 @@ void r300_init_reg_flags(struct drm_device *dev)
        ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
        ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
        ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
+       ADD_RANGE(R300_ZB_ZPASS_DATA, 2); /* ZB_ZPASS_DATA, ZB_ZPASS_ADDR */
 
        ADD_RANGE(R300_TX_FILTER_0, 16);
        ADD_RANGE(R300_TX_FILTER1_0, 16);
@@ -277,46 +300,42 @@ static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
        int reg;
        int sz;
        int i;
-       int values[64];
+       u32 *value;
        RING_LOCALS;
 
        sz = header.packet0.count;
        reg = (header.packet0.reghi << 8) | header.packet0.reglo;
 
        if ((sz > 64) || (sz < 0)) {
-               DRM_ERROR
-                   ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
-                    reg, sz);
+               DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
+                        reg, sz);
                return -EINVAL;
        }
+
        for (i = 0; i < sz; i++) {
-               values[i] = ((int *)cmdbuf->buf)[i];
                switch (r300_reg_flags[(reg >> 2) + i]) {
                case MARK_SAFE:
                        break;
                case MARK_CHECK_OFFSET:
-                       if (!radeon_check_offset(dev_priv, (u32) values[i])) {
-                               DRM_ERROR
-                                   ("Offset failed range check (reg=%04x sz=%d)\n",
-                                    reg, sz);
+                       value = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
+                       if (!radeon_check_offset(dev_priv, *value)) {
+                               DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n",
+                                        reg, sz);
                                return -EINVAL;
                        }
                        break;
                default:
                        DRM_ERROR("Register %04x failed check as flag=%02x\n",
-                                 reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
+                               reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
                        return -EINVAL;
                }
        }
 
        BEGIN_RING(1 + sz);
        OUT_RING(CP_PACKET0(reg, sz - 1));
-       OUT_RING_TABLE(values, sz);
+       OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
        ADVANCE_RING();
 
-       cmdbuf->buf += sz * 4;
-       cmdbuf->bufsz -= sz * 4;
-
        return 0;
 }
 
@@ -340,7 +359,7 @@ static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
        if (!sz)
                return 0;
 
-       if (sz * 4 > cmdbuf->bufsz)
+       if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
                return -EINVAL;
 
        if (reg + sz * 4 >= 0x10000) {
@@ -358,12 +377,9 @@ static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
 
        BEGIN_RING(1 + sz);
        OUT_RING(CP_PACKET0(reg, sz - 1));
-       OUT_RING_TABLE((int *)cmdbuf->buf, sz);
+       OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
        ADVANCE_RING();
 
-       cmdbuf->buf += sz * 4;
-       cmdbuf->bufsz -= sz * 4;
-
        return 0;
 }
 
@@ -385,22 +401,32 @@ static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
 
        if (!sz)
                return 0;
-       if (sz * 16 > cmdbuf->bufsz)
+       if (sz * 16 > drm_buffer_unprocessed(cmdbuf->buffer))
                return -EINVAL;
 
-       BEGIN_RING(5 + sz * 4);
-       /* Wait for VAP to come to senses.. */
-       /* there is no need to emit it multiple times, (only once before VAP is programmed,
-          but this optimization is for later */
-       OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
+       /* VAP is very sensitive so we purge cache before we program it
+        * and we also flush its state before & after */
+       BEGIN_RING(6);
+       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(R300_RB3D_DC_FLUSH);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+       OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
+       OUT_RING(0);
+       ADVANCE_RING();
+       /* set flush flag */
+       dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
+       BEGIN_RING(3 + sz * 4);
        OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
        OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
-       OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
-
+       OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * 4);
        ADVANCE_RING();
 
-       cmdbuf->buf += sz * 16;
-       cmdbuf->bufsz -= sz * 16;
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
+       OUT_RING(0);
+       ADVANCE_RING();
 
        return 0;
 }
@@ -414,18 +440,24 @@ static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
 {
        RING_LOCALS;
 
-       if (8 * 4 > cmdbuf->bufsz)
+       if (8 * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
                return -EINVAL;
 
        BEGIN_RING(10);
        OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
        OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
                 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
-       OUT_RING_TABLE((int *)cmdbuf->buf, 8);
+       OUT_RING_DRM_BUFFER(cmdbuf->buffer, 8);
        ADVANCE_RING();
 
-       cmdbuf->buf += 8 * 4;
-       cmdbuf->bufsz -= 8 * 4;
+       BEGIN_RING(4);
+       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(R300_RB3D_DC_FLUSH);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+       ADVANCE_RING();
+       /* set flush flag */
+       dev_priv->track_flush |= RADEON_FLUSH_EMITED;
 
        return 0;
 }
@@ -436,28 +468,29 @@ static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
 {
        int count, i, k;
 #define MAX_ARRAY_PACKET  64
-       u32 payload[MAX_ARRAY_PACKET];
+       u32 *data;
        u32 narrays;
        RING_LOCALS;
 
-       count = (header >> 16) & 0x3fff;
+       count = (header & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 
        if ((count + 1) > MAX_ARRAY_PACKET) {
                DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
                          count);
                return -EINVAL;
        }
-       memset(payload, 0, MAX_ARRAY_PACKET * 4);
-       memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
-
        /* carefully check packet contents */
 
-       narrays = payload[0];
+       /* We have already read the header so advance the buffer. */
+       drm_buffer_advance(cmdbuf->buffer, 4);
+
+       narrays = *(u32 *)drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
        k = 0;
        i = 1;
        while ((k < narrays) && (i < (count + 1))) {
                i++;            /* skip attribute field */
-               if (!radeon_check_offset(dev_priv, payload[i])) {
+               data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
+               if (!radeon_check_offset(dev_priv, *data)) {
                        DRM_ERROR
                            ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
                             k, i);
@@ -468,7 +501,8 @@ static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
                if (k == narrays)
                        break;
                /* have one more to process, they come in pairs */
-               if (!radeon_check_offset(dev_priv, payload[i])) {
+               data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
+               if (!radeon_check_offset(dev_priv, *data)) {
                        DRM_ERROR
                            ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
                             k, i);
@@ -489,30 +523,30 @@ static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
 
        BEGIN_RING(count + 2);
        OUT_RING(header);
-       OUT_RING_TABLE(payload, count + 1);
+       OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 1);
        ADVANCE_RING();
 
-       cmdbuf->buf += (count + 2) * 4;
-       cmdbuf->bufsz -= (count + 2) * 4;
-
        return 0;
 }
 
 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
                                             drm_radeon_kcmd_buffer_t *cmdbuf)
 {
-       u32 *cmd = (u32 *) cmdbuf->buf;
+       u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
        int count, ret;
        RING_LOCALS;
 
-       count=(cmd[0]>>16) & 0x3fff;
 
-       if (cmd[0] & 0x8000) {
-               u32 offset;
+       count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 
-               if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
+       if (*cmd & 0x8000) {
+               u32 offset;
+               u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
+               if (*cmd1 & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
                              | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
-                       offset = cmd[2] << 10;
+
+                       u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
+                       offset = *cmd2 << 10;
                        ret = !radeon_check_offset(dev_priv, offset);
                        if (ret) {
                                DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
@@ -520,9 +554,10 @@ static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
                        }
                }
 
-               if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
-                   (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
-                       offset = cmd[3] << 10;
+               if ((*cmd1 & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
+                   (*cmd1 & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
+                       u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
+                       offset = *cmd3 << 10;
                        ret = !radeon_check_offset(dev_priv, offset);
                        if (ret) {
                                DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
@@ -533,42 +568,82 @@ static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
        }
 
        BEGIN_RING(count+2);
-       OUT_RING(cmd[0]);
-       OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
+       OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
        ADVANCE_RING();
 
-       cmdbuf->buf += (count+2)*4;
-       cmdbuf->bufsz -= (count+2)*4;
-
        return 0;
 }
 
-static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
-                                            drm_radeon_kcmd_buffer_t *cmdbuf)
+static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
+                                           drm_radeon_kcmd_buffer_t *cmdbuf)
 {
-       u32 *cmd = (u32 *) cmdbuf->buf;
-       int count, ret;
+       u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
+       u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
+       int count;
+       int expected_count;
        RING_LOCALS;
 
-       count=(cmd[0]>>16) & 0x3fff;
+       count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 
-       if ((cmd[1] & 0x8000ffff) != 0x80000810) {
-               DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
-               return -EINVAL;
-       }
-       ret = !radeon_check_offset(dev_priv, cmd[2]);
-       if (ret) {
-               DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
+       expected_count = *cmd1 >> 16;
+       if (!(*cmd1 & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
+               expected_count = (expected_count+1)/2;
+
+       if (count && count != expected_count) {
+               DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
+                       count, expected_count);
                return -EINVAL;
        }
 
        BEGIN_RING(count+2);
-       OUT_RING(cmd[0]);
-       OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
+       OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
        ADVANCE_RING();
 
-       cmdbuf->buf += (count+2)*4;
-       cmdbuf->bufsz -= (count+2)*4;
+       if (!count) {
+               drm_r300_cmd_header_t stack_header, *header;
+               u32 *cmd1, *cmd2, *cmd3;
+
+               if (drm_buffer_unprocessed(cmdbuf->buffer)
+                               < 4*4 + sizeof(stack_header)) {
+                       DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
+                       return -EINVAL;
+               }
+
+               header = drm_buffer_read_object(cmdbuf->buffer,
+                               sizeof(stack_header), &stack_header);
+
+               cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
+               cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
+               cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
+               cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
+
+               if (header->header.cmd_type != R300_CMD_PACKET3 ||
+                   header->packet3.packet != R300_CMD_PACKET3_RAW ||
+                   *cmd != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
+                       DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
+                       return -EINVAL;
+               }
+
+               if ((*cmd1 & 0x8000ffff) != 0x80000810) {
+                       DRM_ERROR("Invalid indx_buffer reg address %08X\n",
+                                       *cmd1);
+                       return -EINVAL;
+               }
+               if (!radeon_check_offset(dev_priv, *cmd2)) {
+                       DRM_ERROR("Invalid indx_buffer offset is %08X\n",
+                                       *cmd2);
+                       return -EINVAL;
+               }
+               if (*cmd3 != expected_count) {
+                       DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
+                               *cmd3, expected_count);
+                       return -EINVAL;
+               }
+
+               BEGIN_RING(4);
+               OUT_RING_DRM_BUFFER(cmdbuf->buffer, 4);
+               ADVANCE_RING();
+       }
 
        return 0;
 }
@@ -576,65 +651,72 @@ static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
                                            drm_radeon_kcmd_buffer_t *cmdbuf)
 {
-       u32 header;
+       u32 *header;
        int count;
        RING_LOCALS;
 
-       if (4 > cmdbuf->bufsz)
+       if (4 > drm_buffer_unprocessed(cmdbuf->buffer))
                return -EINVAL;
 
        /* Fixme !! This simply emits a packet without much checking.
           We need to be smarter. */
 
        /* obtain first word - actual packet3 header */
-       header = *(u32 *) cmdbuf->buf;
+       header = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 
        /* Is it packet 3 ? */
-       if ((header >> 30) != 0x3) {
-               DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
+       if ((*header >> 30) != 0x3) {
+               DRM_ERROR("Not a packet3 header (0x%08x)\n", *header);
                return -EINVAL;
        }
 
-       count = (header >> 16) & 0x3fff;
+       count = (*header >> 16) & 0x3fff;
 
        /* Check again now that we know how much data to expect */
-       if ((count + 2) * 4 > cmdbuf->bufsz) {
+       if ((count + 2) * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) {
                DRM_ERROR
                    ("Expected packet3 of length %d but have only %d bytes left\n",
-                    (count + 2) * 4, cmdbuf->bufsz);
+                    (count + 2) * 4, drm_buffer_unprocessed(cmdbuf->buffer));
                return -EINVAL;
        }
 
        /* Is it a packet type we know about ? */
-       switch (header & 0xff00) {
+       switch (*header & 0xff00) {
        case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
-               return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
+               return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, *header);
 
        case RADEON_CNTL_BITBLT_MULTI:
                return r300_emit_bitblt_multi(dev_priv, cmdbuf);
 
-       case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
-               return r300_emit_indx_buffer(dev_priv, cmdbuf);
-       case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
-       case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
-       case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
+       case RADEON_CP_INDX_BUFFER:
+               DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
+               return -EINVAL;
+       case RADEON_CP_3D_DRAW_IMMD_2:
+               /* triggers drawing using in-packet vertex data */
+       case RADEON_CP_3D_DRAW_VBUF_2:
+               /* triggers drawing of vertex buffers setup elsewhere */
+               dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
+                                          RADEON_PURGE_EMITED);
+               break;
+       case RADEON_CP_3D_DRAW_INDX_2:
+               /* triggers drawing using indices to vertex buffer */
+               /* whenever we send vertex we clear flush & purge */
+               dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
+                                          RADEON_PURGE_EMITED);
+               return r300_emit_draw_indx_2(dev_priv, cmdbuf);
        case RADEON_WAIT_FOR_IDLE:
        case RADEON_CP_NOP:
                /* these packets are safe */
                break;
        default:
-               DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
+               DRM_ERROR("Unknown packet3 header (0x%08x)\n", *header);
                return -EINVAL;
        }
 
        BEGIN_RING(count + 2);
-       OUT_RING(header);
-       OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
+       OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
        ADVANCE_RING();
 
-       cmdbuf->buf += (count + 2) * 4;
-       cmdbuf->bufsz -= (count + 2) * 4;
-
        return 0;
 }
 
@@ -648,8 +730,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
 {
        int n;
        int ret;
-       char *orig_buf = cmdbuf->buf;
-       int orig_bufsz = cmdbuf->bufsz;
+       int orig_iter = cmdbuf->buffer->iterator;
 
        /* This is a do-while-loop so that we run the interior at least once,
         * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
@@ -661,8 +742,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
                        if (ret)
                                return ret;
 
-                       cmdbuf->buf = orig_buf;
-                       cmdbuf->bufsz = orig_bufsz;
+                       cmdbuf->buffer->iterator = orig_iter;
                }
 
                switch (header.packet3.packet) {
@@ -685,9 +765,9 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
                        break;
 
                default:
-                       DRM_ERROR("bad packet3 type %i at %p\n",
+                       DRM_ERROR("bad packet3 type %i at byte %d\n",
                                  header.packet3.packet,
-                                 cmdbuf->buf - sizeof(header));
+                                 cmdbuf->buffer->iterator - (int)sizeof(header));
                        return -EINVAL;
                }
 
@@ -713,17 +793,53 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
  */
 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
 {
+       uint32_t cache_z, cache_3d, cache_2d;
        RING_LOCALS;
 
-       BEGIN_RING(6);
-       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
-       OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
+       cache_z = R300_ZC_FLUSH;
+       cache_2d = R300_RB2D_DC_FLUSH;
+       cache_3d = R300_RB3D_DC_FLUSH;
+       if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
+               /* we can purge, primitive where draw since last purge */
+               cache_z |= R300_ZC_FREE;
+               cache_2d |= R300_RB2D_DC_FREE;
+               cache_3d |= R300_RB3D_DC_FREE;
+       }
+
+       /* flush & purge zbuffer */
+       BEGIN_RING(2);
        OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
-       OUT_RING(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE|
-                R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-       OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
-       OUT_RING(0x0);
+       OUT_RING(cache_z);
+       ADVANCE_RING();
+       /* flush & purge 3d */
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(cache_3d);
+       ADVANCE_RING();
+       /* flush & purge texture */
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
+       OUT_RING(0);
+       ADVANCE_RING();
+       /* FIXME: is this one really needed ? */
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
+       OUT_RING(0);
+       ADVANCE_RING();
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+       ADVANCE_RING();
+       /* flush & purge 2d through E2 as RB2D will trigger lockup */
+       BEGIN_RING(4);
+       OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(cache_2d);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
+                RADEON_WAIT_HOST_IDLECLEAN);
        ADVANCE_RING();
+       /* set flush & purge flags */
+       dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
 }
 
 /**
@@ -731,12 +847,12 @@ static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
  * be careful about how this function is called.
  */
-static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
+static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
 {
-       drm_radeon_private_t *dev_priv = dev->dev_private;
        drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
+       struct drm_radeon_master_private *master_priv = master->driver_priv;
 
-       buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
+       buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
        buf->pending = 1;
        buf->used = 0;
 }
@@ -787,11 +903,13 @@ static int r300_scratch(drm_radeon_private_t *dev_priv,
                        drm_r300_cmd_header_t header)
 {
        u32 *ref_age_base;
-       u32 i, buf_idx, h_pending;
+       u32 i, *buf_idx, h_pending;
+       u64 *ptr_addr;
+       u64 stack_ptr_addr;
        RING_LOCALS;
 
-       if (cmdbuf->bufsz <
-           (sizeof(u64) + header.scratch.n_bufs * sizeof(buf_idx))) {
+       if (drm_buffer_unprocessed(cmdbuf->buffer) <
+           (sizeof(u64) + header.scratch.n_bufs * sizeof(*buf_idx))) {
                return -EINVAL;
        }
 
@@ -801,35 +919,35 @@ static int r300_scratch(drm_radeon_private_t *dev_priv,
 
        dev_priv->scratch_ages[header.scratch.reg]++;
 
-       ref_age_base =  (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
-
-       cmdbuf->buf += sizeof(u64);
-       cmdbuf->bufsz -= sizeof(u64);
+       ptr_addr = drm_buffer_read_object(cmdbuf->buffer,
+                       sizeof(stack_ptr_addr), &stack_ptr_addr);
+       ref_age_base = (u32 *)(unsigned long)get_unaligned(ptr_addr);
 
        for (i=0; i < header.scratch.n_bufs; i++) {
-               buf_idx = *(u32 *)cmdbuf->buf;
-               buf_idx *= 2; /* 8 bytes per buf */
+               buf_idx = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
+               *buf_idx *= 2; /* 8 bytes per buf */
 
-               if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
+               if (DRM_COPY_TO_USER(ref_age_base + *buf_idx,
+                               &dev_priv->scratch_ages[header.scratch.reg],
+                               sizeof(u32)))
                        return -EINVAL;
-               }
 
-               if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
+               if (DRM_COPY_FROM_USER(&h_pending,
+                               ref_age_base + *buf_idx + 1,
+                               sizeof(u32)))
                        return -EINVAL;
-               }
 
-               if (h_pending == 0) {
+               if (h_pending == 0)
                        return -EINVAL;
-               }
 
                h_pending--;
 
-               if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
+               if (DRM_COPY_TO_USER(ref_age_base + *buf_idx + 1,
+                                       &h_pending,
+                                       sizeof(u32)))
                        return -EINVAL;
-               }
 
-               cmdbuf->buf += sizeof(buf_idx);
-               cmdbuf->bufsz -= sizeof(buf_idx);
+               drm_buffer_advance(cmdbuf->buffer, sizeof(*buf_idx));
        }
 
        BEGIN_RING(2);
@@ -852,7 +970,7 @@ static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
        int sz;
        int addr;
        int type;
-       int clamp;
+       int isclamp;
        int stride;
        RING_LOCALS;
 
@@ -861,29 +979,26 @@ static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
        addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
 
        type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
-       clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
+       isclamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
 
        addr |= (type << 16);
-       addr |= (clamp << 17);
+       addr |= (isclamp << 17);
 
        stride = type ? 4 : 6;
 
        DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
        if (!sz)
                return 0;
-       if (sz * stride * 4 > cmdbuf->bufsz)
+       if (sz * stride * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
                return -EINVAL;
 
        BEGIN_RING(3 + sz * stride);
        OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
        OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
-       OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
+       OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * stride);
 
        ADVANCE_RING();
 
-       cmdbuf->buf += sz * stride * 4;
-       cmdbuf->bufsz -= sz * stride * 4;
-
        return 0;
 }
 
@@ -898,6 +1013,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
                      drm_radeon_kcmd_buffer_t *cmdbuf)
 {
        drm_radeon_private_t *dev_priv = dev->dev_private;
+       struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
        struct drm_device_dma *dma = dev->dma;
        struct drm_buf *buf = NULL;
        int emit_dispatch_age = 0;
@@ -905,8 +1021,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 
        DRM_DEBUG("\n");
 
-       /* See the comment above r300_emit_begin3d for why this call must be here,
-        * and what the cleanup gotos are for. */
+       /* pacify */
        r300_pacify(dev_priv);
 
        if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
@@ -915,19 +1030,18 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
                        goto cleanup;
        }
 
-       while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
+       while (drm_buffer_unprocessed(cmdbuf->buffer)
+                       >= sizeof(drm_r300_cmd_header_t)) {
                int idx;
-               drm_r300_cmd_header_t header;
-
-               header.u = *(unsigned int *)cmdbuf->buf;
+               drm_r300_cmd_header_t *header, stack_header;
 
-               cmdbuf->buf += sizeof(header);
-               cmdbuf->bufsz -= sizeof(header);
+               header = drm_buffer_read_object(cmdbuf->buffer,
+                               sizeof(stack_header), &stack_header);
 
-               switch (header.header.cmd_type) {
+               switch (header->header.cmd_type) {
                case R300_CMD_PACKET0:
                        DRM_DEBUG("R300_CMD_PACKET0\n");
-                       ret = r300_emit_packet0(dev_priv, cmdbuf, header);
+                       ret = r300_emit_packet0(dev_priv, cmdbuf, *header);
                        if (ret) {
                                DRM_ERROR("r300_emit_packet0 failed\n");
                                goto cleanup;
@@ -936,7 +1050,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 
                case R300_CMD_VPU:
                        DRM_DEBUG("R300_CMD_VPU\n");
-                       ret = r300_emit_vpu(dev_priv, cmdbuf, header);
+                       ret = r300_emit_vpu(dev_priv, cmdbuf, *header);
                        if (ret) {
                                DRM_ERROR("r300_emit_vpu failed\n");
                                goto cleanup;
@@ -945,7 +1059,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 
                case R300_CMD_PACKET3:
                        DRM_DEBUG("R300_CMD_PACKET3\n");
-                       ret = r300_emit_packet3(dev_priv, cmdbuf, header);
+                       ret = r300_emit_packet3(dev_priv, cmdbuf, *header);
                        if (ret) {
                                DRM_ERROR("r300_emit_packet3 failed\n");
                                goto cleanup;
@@ -979,8 +1093,8 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
                                int i;
                                RING_LOCALS;
 
-                               BEGIN_RING(header.delay.count);
-                               for (i = 0; i < header.delay.count; i++)
+                               BEGIN_RING(header->delay.count);
+                               for (i = 0; i < header->delay.count; i++)
                                        OUT_RING(RADEON_CP_PACKET2);
                                ADVANCE_RING();
                        }
@@ -988,7 +1102,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 
                case R300_CMD_DMA_DISCARD:
                        DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
-                       idx = header.dma.buf_idx;
+                       idx = header->dma.buf_idx;
                        if (idx < 0 || idx >= dma->buf_count) {
                                DRM_ERROR("buffer index %d (of %d max)\n",
                                          idx, dma->buf_count - 1);
@@ -1006,17 +1120,17 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
                        }
 
                        emit_dispatch_age = 1;
-                       r300_discard_buffer(dev, buf);
+                       r300_discard_buffer(dev, file_priv->master, buf);
                        break;
 
                case R300_CMD_WAIT:
                        DRM_DEBUG("R300_CMD_WAIT\n");
-                       r300_cmd_wait(dev_priv, header);
+                       r300_cmd_wait(dev_priv, *header);
                        break;
 
                case R300_CMD_SCRATCH:
                        DRM_DEBUG("R300_CMD_SCRATCH\n");
-                       ret = r300_scratch(dev_priv, cmdbuf, header);
+                       ret = r300_scratch(dev_priv, cmdbuf, *header);
                        if (ret) {
                                DRM_ERROR("r300_scratch failed\n");
                                goto cleanup;
@@ -1030,16 +1144,16 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
                                goto cleanup;
                        }
                        DRM_DEBUG("R300_CMD_R500FP\n");
-                       ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
+                       ret = r300_emit_r500fp(dev_priv, cmdbuf, *header);
                        if (ret) {
                                DRM_ERROR("r300_emit_r500fp failed\n");
                                goto cleanup;
                        }
                        break;
                default:
-                       DRM_ERROR("bad cmd_type %i at %p\n",
-                                 header.header.cmd_type,
-                                 cmdbuf->buf - sizeof(header));
+                       DRM_ERROR("bad cmd_type %i at byte %d\n",
+                                 header->header.cmd_type,
+                                 cmdbuf->buffer->iterator - (int)sizeof(*header));
                        ret = -EINVAL;
                        goto cleanup;
                }
@@ -1061,7 +1175,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 
                /* Emit the vertex buffer age */
                BEGIN_RING(2);
-               RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
+               RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);
                ADVANCE_RING();
        }