Merge branch 'classmate' into release
[safe/jmp/linux-2.6] / drivers / gpu / drm / radeon / r300_cmdbuf.c
index 702df45..34bffa0 100644 (file)
@@ -37,6 +37,8 @@
 #include "radeon_drv.h"
 #include "r300_reg.h"
 
+#include <asm/unaligned.h>
+
 #define R300_SIMULTANEOUS_CLIPRECTS            4
 
 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
@@ -77,6 +79,9 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
                                return -EFAULT;
                        }
 
+                       box.x2--; /* Hardware expects inclusive bottom-right corner */
+                       box.y2--;
+
                        if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
                                box.x1 = (box.x1) &
                                        R300_CLIPRECT_MASK;
@@ -95,8 +100,8 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
                                        R300_CLIPRECT_MASK;
                                box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
                                        R300_CLIPRECT_MASK;
-
                        }
+
                        OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
                                 (box.y1 << R300_CLIPRECT_Y_SHIFT));
                        OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
@@ -136,6 +141,18 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
                ADVANCE_RING();
        }
 
+       /* flus cache and wait idle clean after cliprect change */
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(R300_RB3D_DC_FLUSH);
+       ADVANCE_RING();
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+       ADVANCE_RING();
+       /* set flush flag */
+       dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
        return 0;
 }
 
@@ -166,13 +183,13 @@ void r300_init_reg_flags(struct drm_device *dev)
        ADD_RANGE(0x21DC, 1);
        ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
        ADD_RANGE(R300_VAP_CLIP_X_0, 4);
-       ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
+       ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
        ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
        ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
        ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
        ADD_RANGE(R300_GB_ENABLE, 1);
        ADD_RANGE(R300_GB_MSPOS0, 5);
-       ADD_RANGE(R300_TX_CNTL, 1);
+       ADD_RANGE(R300_TX_INVALTAGS, 1);
        ADD_RANGE(R300_TX_ENABLE, 1);
        ADD_RANGE(0x4200, 4);
        ADD_RANGE(0x4214, 1);
@@ -190,6 +207,10 @@ void r300_init_reg_flags(struct drm_device *dev)
        ADD_RANGE(0x42C0, 2);
        ADD_RANGE(R300_RS_CNTL_0, 2);
 
+       ADD_RANGE(R300_SU_REG_DEST, 1);
+       if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530)
+               ADD_RANGE(RV530_FG_ZBREG_DEST, 1);
+
        ADD_RANGE(R300_SC_HYPERZ, 2);
        ADD_RANGE(0x43E8, 1);
 
@@ -215,6 +236,7 @@ void r300_init_reg_flags(struct drm_device *dev)
        ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
        ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
        ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
+       ADD_RANGE(R300_ZB_ZPASS_DATA, 2); /* ZB_ZPASS_DATA, ZB_ZPASS_ADDR */
 
        ADD_RANGE(R300_TX_FILTER_0, 16);
        ADD_RANGE(R300_TX_FILTER1_0, 16);
@@ -388,15 +410,28 @@ static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
        if (sz * 16 > cmdbuf->bufsz)
                return -EINVAL;
 
-       BEGIN_RING(5 + sz * 4);
-       /* Wait for VAP to come to senses.. */
-       /* there is no need to emit it multiple times, (only once before VAP is programmed,
-          but this optimization is for later */
-       OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
+       /* VAP is very sensitive so we purge cache before we program it
+        * and we also flush its state before & after */
+       BEGIN_RING(6);
+       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(R300_RB3D_DC_FLUSH);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+       OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
+       OUT_RING(0);
+       ADVANCE_RING();
+       /* set flush flag */
+       dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
+       BEGIN_RING(3 + sz * 4);
        OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
        OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
        OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
+       ADVANCE_RING();
 
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
+       OUT_RING(0);
        ADVANCE_RING();
 
        cmdbuf->buf += sz * 16;
@@ -424,6 +459,15 @@ static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
        OUT_RING_TABLE((int *)cmdbuf->buf, 8);
        ADVANCE_RING();
 
+       BEGIN_RING(4);
+       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(R300_RB3D_DC_FLUSH);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+       ADVANCE_RING();
+       /* set flush flag */
+       dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
        cmdbuf->buf += 8 * 4;
        cmdbuf->bufsz -= 8 * 4;
 
@@ -543,22 +587,23 @@ static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
        return 0;
 }
 
-static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
-                                            drm_radeon_kcmd_buffer_t *cmdbuf)
+static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
+                                           drm_radeon_kcmd_buffer_t *cmdbuf)
 {
-       u32 *cmd = (u32 *) cmdbuf->buf;
-       int count, ret;
+       u32 *cmd;
+       int count;
+       int expected_count;
        RING_LOCALS;
 
-       count=(cmd[0]>>16) & 0x3fff;
+       cmd = (u32 *) cmdbuf->buf;
+       count = (cmd[0]>>16) & 0x3fff;
+       expected_count = cmd[1] >> 16;
+       if (!(cmd[1] & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
+               expected_count = (expected_count+1)/2;
 
-       if ((cmd[1] & 0x8000ffff) != 0x80000810) {
-               DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
-               return -EINVAL;
-       }
-       ret = !radeon_check_offset(dev_priv, cmd[2]);
-       if (ret) {
-               DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
+       if (count && count != expected_count) {
+               DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
+                       count, expected_count);
                return -EINVAL;
        }
 
@@ -570,6 +615,50 @@ static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
        cmdbuf->buf += (count+2)*4;
        cmdbuf->bufsz -= (count+2)*4;
 
+       if (!count) {
+               drm_r300_cmd_header_t header;
+
+               if (cmdbuf->bufsz < 4*4 + sizeof(header)) {
+                       DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
+                       return -EINVAL;
+               }
+
+               header.u = *(unsigned int *)cmdbuf->buf;
+
+               cmdbuf->buf += sizeof(header);
+               cmdbuf->bufsz -= sizeof(header);
+               cmd = (u32 *) cmdbuf->buf;
+
+               if (header.header.cmd_type != R300_CMD_PACKET3 ||
+                   header.packet3.packet != R300_CMD_PACKET3_RAW ||
+                   cmd[0] != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
+                       DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
+                       return -EINVAL;
+               }
+
+               if ((cmd[1] & 0x8000ffff) != 0x80000810) {
+                       DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
+                       return -EINVAL;
+               }
+               if (!radeon_check_offset(dev_priv, cmd[2])) {
+                       DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
+                       return -EINVAL;
+               }
+               if (cmd[3] != expected_count) {
+                       DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
+                               cmd[3], expected_count);
+                       return -EINVAL;
+               }
+
+               BEGIN_RING(4);
+               OUT_RING(cmd[0]);
+               OUT_RING_TABLE((int *)(cmdbuf->buf + 4), 3);
+               ADVANCE_RING();
+
+               cmdbuf->buf += 4*4;
+               cmdbuf->bufsz -= 4*4;
+       }
+
        return 0;
 }
 
@@ -613,11 +702,22 @@ static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
        case RADEON_CNTL_BITBLT_MULTI:
                return r300_emit_bitblt_multi(dev_priv, cmdbuf);
 
-       case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
-               return r300_emit_indx_buffer(dev_priv, cmdbuf);
-       case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
-       case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
-       case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
+       case RADEON_CP_INDX_BUFFER:
+               DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
+               return -EINVAL;
+       case RADEON_CP_3D_DRAW_IMMD_2:
+               /* triggers drawing using in-packet vertex data */
+       case RADEON_CP_3D_DRAW_VBUF_2:
+               /* triggers drawing of vertex buffers setup elsewhere */
+               dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
+                                          RADEON_PURGE_EMITED);
+               break;
+       case RADEON_CP_3D_DRAW_INDX_2:
+               /* triggers drawing using indices to vertex buffer */
+               /* whenever we send vertex we clear flush & purge */
+               dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
+                                          RADEON_PURGE_EMITED);
+               return r300_emit_draw_indx_2(dev_priv, cmdbuf);
        case RADEON_WAIT_FOR_IDLE:
        case RADEON_CP_NOP:
                /* these packets are safe */
@@ -713,17 +813,53 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
  */
 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
 {
+       uint32_t cache_z, cache_3d, cache_2d;
        RING_LOCALS;
 
-       BEGIN_RING(6);
-       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
-       OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
+       cache_z = R300_ZC_FLUSH;
+       cache_2d = R300_RB2D_DC_FLUSH;
+       cache_3d = R300_RB3D_DC_FLUSH;
+       if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
+               /* we can purge, primitive where draw since last purge */
+               cache_z |= R300_ZC_FREE;
+               cache_2d |= R300_RB2D_DC_FREE;
+               cache_3d |= R300_RB3D_DC_FREE;
+       }
+
+       /* flush & purge zbuffer */
+       BEGIN_RING(2);
        OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
-       OUT_RING(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE|
-                R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-       OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
-       OUT_RING(0x0);
+       OUT_RING(cache_z);
+       ADVANCE_RING();
+       /* flush & purge 3d */
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(cache_3d);
+       ADVANCE_RING();
+       /* flush & purge texture */
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
+       OUT_RING(0);
+       ADVANCE_RING();
+       /* FIXME: is this one really needed ? */
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
+       OUT_RING(0);
        ADVANCE_RING();
+       BEGIN_RING(2);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+       ADVANCE_RING();
+       /* flush & purge 2d through E2 as RB2D will trigger lockup */
+       BEGIN_RING(4);
+       OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
+       OUT_RING(cache_2d);
+       OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+       OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
+                RADEON_WAIT_HOST_IDLECLEAN);
+       ADVANCE_RING();
+       /* set flush & purge flags */
+       dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
 }
 
 /**
@@ -731,12 +867,12 @@ static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
  * be careful about how this function is called.
  */
-static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
+static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
 {
-       drm_radeon_private_t *dev_priv = dev->dev_private;
        drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
+       struct drm_radeon_master_private *master_priv = master->driver_priv;
 
-       buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
+       buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
        buf->pending = 1;
        buf->used = 0;
 }
@@ -788,6 +924,7 @@ static int r300_scratch(drm_radeon_private_t *dev_priv,
 {
        u32 *ref_age_base;
        u32 i, buf_idx, h_pending;
+       u64 ptr_addr;
        RING_LOCALS;
 
        if (cmdbuf->bufsz <
@@ -801,7 +938,8 @@ static int r300_scratch(drm_radeon_private_t *dev_priv,
 
        dev_priv->scratch_ages[header.scratch.reg]++;
 
-       ref_age_base =  (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
+       ptr_addr = get_unaligned((u64 *)cmdbuf->buf);
+       ref_age_base = (u32 *)(unsigned long)ptr_addr;
 
        cmdbuf->buf += sizeof(u64);
        cmdbuf->bufsz -= sizeof(u64);
@@ -852,7 +990,7 @@ static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
        int sz;
        int addr;
        int type;
-       int clamp;
+       int isclamp;
        int stride;
        RING_LOCALS;
 
@@ -861,10 +999,10 @@ static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
        addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
 
        type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
-       clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
+       isclamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
 
        addr |= (type << 16);
-       addr |= (clamp << 17);
+       addr |= (isclamp << 17);
 
        stride = type ? 4 : 6;
 
@@ -898,6 +1036,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
                      drm_radeon_kcmd_buffer_t *cmdbuf)
 {
        drm_radeon_private_t *dev_priv = dev->dev_private;
+       struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
        struct drm_device_dma *dma = dev->dma;
        struct drm_buf *buf = NULL;
        int emit_dispatch_age = 0;
@@ -905,8 +1044,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 
        DRM_DEBUG("\n");
 
-       /* See the comment above r300_emit_begin3d for why this call must be here,
-        * and what the cleanup gotos are for. */
+       /* pacify */
        r300_pacify(dev_priv);
 
        if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
@@ -1006,7 +1144,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
                        }
 
                        emit_dispatch_age = 1;
-                       r300_discard_buffer(dev, buf);
+                       r300_discard_buffer(dev, file_priv->master, buf);
                        break;
 
                case R300_CMD_WAIT:
@@ -1061,7 +1199,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
 
                /* Emit the vertex buffer age */
                BEGIN_RING(2);
-               RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
+               RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);
                ADVANCE_RING();
        }