X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fgpu%2Fdrm%2Fradeon%2Fr300_cmdbuf.c;h=c5c2742e41405514364b34bfdbc39c725bc195b7;hb=3374cd1abd478f767aaedf2c21d109596ff0fe72;hp=702df45320f7bcc897439e8b38a1f8d74200c5c4;hpb=c0e09200dc0813972442e550a5905a132768e56c;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c index 702df45..c5c2742 100644 --- a/drivers/gpu/drm/radeon/r300_cmdbuf.c +++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c @@ -33,10 +33,13 @@ #include "drmP.h" #include "drm.h" +#include "drm_buffer.h" #include "radeon_drm.h" #include "radeon_drv.h" #include "r300_reg.h" +#include + #define R300_SIMULTANEOUS_CLIPRECTS 4 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects @@ -77,6 +80,9 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv, return -EFAULT; } + box.x2--; /* Hardware expects inclusive bottom-right corner */ + box.y2--; + if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) { box.x1 = (box.x1) & R300_CLIPRECT_MASK; @@ -95,8 +101,8 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv, R300_CLIPRECT_MASK; box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK; - } + OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) | (box.y1 << R300_CLIPRECT_Y_SHIFT)); OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) | @@ -136,6 +142,18 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv, ADVANCE_RING(); } + /* flus cache and wait idle clean after cliprect change */ + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + OUT_RING(R300_RB3D_DC_FLUSH); + ADVANCE_RING(); + BEGIN_RING(2); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_3D_IDLECLEAN); + ADVANCE_RING(); + /* set flush flag */ + dev_priv->track_flush |= RADEON_FLUSH_EMITED; + return 0; } @@ -166,13 +184,13 @@ void r300_init_reg_flags(struct drm_device *dev) ADD_RANGE(0x21DC, 1); ADD_RANGE(R300_VAP_UNKNOWN_221C, 1); ADD_RANGE(R300_VAP_CLIP_X_0, 4); - ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1); + ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1); ADD_RANGE(R300_VAP_UNKNOWN_2288, 1); ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2); ADD_RANGE(R300_VAP_PVS_CNTL_1, 3); ADD_RANGE(R300_GB_ENABLE, 1); ADD_RANGE(R300_GB_MSPOS0, 5); - ADD_RANGE(R300_TX_CNTL, 1); + ADD_RANGE(R300_TX_INVALTAGS, 1); ADD_RANGE(R300_TX_ENABLE, 1); ADD_RANGE(0x4200, 4); ADD_RANGE(0x4214, 1); @@ -190,6 +208,10 @@ void r300_init_reg_flags(struct drm_device *dev) ADD_RANGE(0x42C0, 2); ADD_RANGE(R300_RS_CNTL_0, 2); + ADD_RANGE(R300_SU_REG_DEST, 1); + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530) + ADD_RANGE(RV530_FG_ZBREG_DEST, 1); + ADD_RANGE(R300_SC_HYPERZ, 2); ADD_RANGE(0x43E8, 1); @@ -215,6 +237,7 @@ void r300_init_reg_flags(struct drm_device *dev) ADD_RANGE(R300_ZB_DEPTHPITCH, 1); ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1); ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13); + ADD_RANGE(R300_ZB_ZPASS_DATA, 2); /* ZB_ZPASS_DATA, ZB_ZPASS_ADDR */ ADD_RANGE(R300_TX_FILTER_0, 16); ADD_RANGE(R300_TX_FILTER1_0, 16); @@ -277,46 +300,42 @@ static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t * int reg; int sz; int i; - int values[64]; + u32 *value; RING_LOCALS; sz = header.packet0.count; reg = (header.packet0.reghi << 8) | header.packet0.reglo; if ((sz > 64) || (sz < 0)) { - DRM_ERROR - ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", - reg, sz); + DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", + reg, sz); return -EINVAL; } + for (i = 0; i < sz; i++) { - values[i] = ((int *)cmdbuf->buf)[i]; switch (r300_reg_flags[(reg >> 2) + i]) { case MARK_SAFE: break; case MARK_CHECK_OFFSET: - if (!radeon_check_offset(dev_priv, (u32) values[i])) { - DRM_ERROR - ("Offset failed range check (reg=%04x sz=%d)\n", - reg, sz); + value = drm_buffer_pointer_to_dword(cmdbuf->buffer, i); + if (!radeon_check_offset(dev_priv, *value)) { + DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n", + reg, sz); return -EINVAL; } break; default: DRM_ERROR("Register %04x failed check as flag=%02x\n", - reg + i * 4, r300_reg_flags[(reg >> 2) + i]); + reg + i * 4, r300_reg_flags[(reg >> 2) + i]); return -EINVAL; } } BEGIN_RING(1 + sz); OUT_RING(CP_PACKET0(reg, sz - 1)); - OUT_RING_TABLE(values, sz); + OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz); ADVANCE_RING(); - cmdbuf->buf += sz * 4; - cmdbuf->bufsz -= sz * 4; - return 0; } @@ -340,7 +359,7 @@ static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv, if (!sz) return 0; - if (sz * 4 > cmdbuf->bufsz) + if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) return -EINVAL; if (reg + sz * 4 >= 0x10000) { @@ -358,12 +377,9 @@ static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv, BEGIN_RING(1 + sz); OUT_RING(CP_PACKET0(reg, sz - 1)); - OUT_RING_TABLE((int *)cmdbuf->buf, sz); + OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz); ADVANCE_RING(); - cmdbuf->buf += sz * 4; - cmdbuf->bufsz -= sz * 4; - return 0; } @@ -385,22 +401,32 @@ static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv, if (!sz) return 0; - if (sz * 16 > cmdbuf->bufsz) + if (sz * 16 > drm_buffer_unprocessed(cmdbuf->buffer)) return -EINVAL; - BEGIN_RING(5 + sz * 4); - /* Wait for VAP to come to senses.. */ - /* there is no need to emit it multiple times, (only once before VAP is programmed, - but this optimization is for later */ - OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0); + /* VAP is very sensitive so we purge cache before we program it + * and we also flush its state before & after */ + BEGIN_RING(6); + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + OUT_RING(R300_RB3D_DC_FLUSH); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_3D_IDLECLEAN); + OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0)); + OUT_RING(0); + ADVANCE_RING(); + /* set flush flag */ + dev_priv->track_flush |= RADEON_FLUSH_EMITED; + + BEGIN_RING(3 + sz * 4); OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr); OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1)); - OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4); - + OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * 4); ADVANCE_RING(); - cmdbuf->buf += sz * 16; - cmdbuf->bufsz -= sz * 16; + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0)); + OUT_RING(0); + ADVANCE_RING(); return 0; } @@ -414,18 +440,24 @@ static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv, { RING_LOCALS; - if (8 * 4 > cmdbuf->bufsz) + if (8 * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) return -EINVAL; BEGIN_RING(10); OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | (1 << R300_PRIM_NUM_VERTICES_SHIFT)); - OUT_RING_TABLE((int *)cmdbuf->buf, 8); + OUT_RING_DRM_BUFFER(cmdbuf->buffer, 8); ADVANCE_RING(); - cmdbuf->buf += 8 * 4; - cmdbuf->bufsz -= 8 * 4; + BEGIN_RING(4); + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + OUT_RING(R300_RB3D_DC_FLUSH); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_3D_IDLECLEAN); + ADVANCE_RING(); + /* set flush flag */ + dev_priv->track_flush |= RADEON_FLUSH_EMITED; return 0; } @@ -436,28 +468,29 @@ static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv, { int count, i, k; #define MAX_ARRAY_PACKET 64 - u32 payload[MAX_ARRAY_PACKET]; + u32 *data; u32 narrays; RING_LOCALS; - count = (header >> 16) & 0x3fff; + count = (header & RADEON_CP_PACKET_COUNT_MASK) >> 16; if ((count + 1) > MAX_ARRAY_PACKET) { DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", count); return -EINVAL; } - memset(payload, 0, MAX_ARRAY_PACKET * 4); - memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4); - /* carefully check packet contents */ - narrays = payload[0]; + /* We have already read the header so advance the buffer. */ + drm_buffer_advance(cmdbuf->buffer, 4); + + narrays = *(u32 *)drm_buffer_pointer_to_dword(cmdbuf->buffer, 0); k = 0; i = 1; while ((k < narrays) && (i < (count + 1))) { i++; /* skip attribute field */ - if (!radeon_check_offset(dev_priv, payload[i])) { + data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i); + if (!radeon_check_offset(dev_priv, *data)) { DRM_ERROR ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i); @@ -468,7 +501,8 @@ static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv, if (k == narrays) break; /* have one more to process, they come in pairs */ - if (!radeon_check_offset(dev_priv, payload[i])) { + data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i); + if (!radeon_check_offset(dev_priv, *data)) { DRM_ERROR ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i); @@ -489,30 +523,30 @@ static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv, BEGIN_RING(count + 2); OUT_RING(header); - OUT_RING_TABLE(payload, count + 1); + OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 1); ADVANCE_RING(); - cmdbuf->buf += (count + 2) * 4; - cmdbuf->bufsz -= (count + 2) * 4; - return 0; } static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf) { - u32 *cmd = (u32 *) cmdbuf->buf; + u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0); int count, ret; RING_LOCALS; - count=(cmd[0]>>16) & 0x3fff; - if (cmd[0] & 0x8000) { - u32 offset; + count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16; - if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL + if (*cmd & 0x8000) { + u32 offset; + u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1); + if (*cmd1 & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { - offset = cmd[2] << 10; + + u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2); + offset = *cmd2 << 10; ret = !radeon_check_offset(dev_priv, offset); if (ret) { DRM_ERROR("Invalid bitblt first offset is %08X\n", offset); @@ -520,9 +554,10 @@ static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv, } } - if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) && - (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { - offset = cmd[3] << 10; + if ((*cmd1 & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) && + (*cmd1 & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) { + u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3); + offset = *cmd3 << 10; ret = !radeon_check_offset(dev_priv, offset); if (ret) { DRM_ERROR("Invalid bitblt second offset is %08X\n", offset); @@ -533,42 +568,82 @@ static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv, } BEGIN_RING(count+2); - OUT_RING(cmd[0]); - OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); + OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2); ADVANCE_RING(); - cmdbuf->buf += (count+2)*4; - cmdbuf->bufsz -= (count+2)*4; - return 0; } -static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv, - drm_radeon_kcmd_buffer_t *cmdbuf) +static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv, + drm_radeon_kcmd_buffer_t *cmdbuf) { - u32 *cmd = (u32 *) cmdbuf->buf; - int count, ret; + u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0); + u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1); + int count; + int expected_count; RING_LOCALS; - count=(cmd[0]>>16) & 0x3fff; + count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16; - if ((cmd[1] & 0x8000ffff) != 0x80000810) { - DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]); - return -EINVAL; - } - ret = !radeon_check_offset(dev_priv, cmd[2]); - if (ret) { - DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]); + expected_count = *cmd1 >> 16; + if (!(*cmd1 & R300_VAP_VF_CNTL__INDEX_SIZE_32bit)) + expected_count = (expected_count+1)/2; + + if (count && count != expected_count) { + DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n", + count, expected_count); return -EINVAL; } BEGIN_RING(count+2); - OUT_RING(cmd[0]); - OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); + OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2); ADVANCE_RING(); - cmdbuf->buf += (count+2)*4; - cmdbuf->bufsz -= (count+2)*4; + if (!count) { + drm_r300_cmd_header_t stack_header, *header; + u32 *cmd1, *cmd2, *cmd3; + + if (drm_buffer_unprocessed(cmdbuf->buffer) + < 4*4 + sizeof(stack_header)) { + DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n"); + return -EINVAL; + } + + header = drm_buffer_read_object(cmdbuf->buffer, + sizeof(stack_header), &stack_header); + + cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0); + cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1); + cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2); + cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3); + + if (header->header.cmd_type != R300_CMD_PACKET3 || + header->packet3.packet != R300_CMD_PACKET3_RAW || + *cmd != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) { + DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n"); + return -EINVAL; + } + + if ((*cmd1 & 0x8000ffff) != 0x80000810) { + DRM_ERROR("Invalid indx_buffer reg address %08X\n", + *cmd1); + return -EINVAL; + } + if (!radeon_check_offset(dev_priv, *cmd2)) { + DRM_ERROR("Invalid indx_buffer offset is %08X\n", + *cmd2); + return -EINVAL; + } + if (*cmd3 != expected_count) { + DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n", + *cmd3, expected_count); + return -EINVAL; + } + + BEGIN_RING(4); + OUT_RING_DRM_BUFFER(cmdbuf->buffer, 4); + ADVANCE_RING(); + } return 0; } @@ -576,65 +651,72 @@ static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv, static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv, drm_radeon_kcmd_buffer_t *cmdbuf) { - u32 header; + u32 *header; int count; RING_LOCALS; - if (4 > cmdbuf->bufsz) + if (4 > drm_buffer_unprocessed(cmdbuf->buffer)) return -EINVAL; /* Fixme !! This simply emits a packet without much checking. We need to be smarter. */ /* obtain first word - actual packet3 header */ - header = *(u32 *) cmdbuf->buf; + header = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0); /* Is it packet 3 ? */ - if ((header >> 30) != 0x3) { - DRM_ERROR("Not a packet3 header (0x%08x)\n", header); + if ((*header >> 30) != 0x3) { + DRM_ERROR("Not a packet3 header (0x%08x)\n", *header); return -EINVAL; } - count = (header >> 16) & 0x3fff; + count = (*header >> 16) & 0x3fff; /* Check again now that we know how much data to expect */ - if ((count + 2) * 4 > cmdbuf->bufsz) { + if ((count + 2) * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) { DRM_ERROR ("Expected packet3 of length %d but have only %d bytes left\n", - (count + 2) * 4, cmdbuf->bufsz); + (count + 2) * 4, drm_buffer_unprocessed(cmdbuf->buffer)); return -EINVAL; } /* Is it a packet type we know about ? */ - switch (header & 0xff00) { + switch (*header & 0xff00) { case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */ - return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header); + return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, *header); case RADEON_CNTL_BITBLT_MULTI: return r300_emit_bitblt_multi(dev_priv, cmdbuf); - case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */ - return r300_emit_indx_buffer(dev_priv, cmdbuf); - case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */ - case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */ - case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */ + case RADEON_CP_INDX_BUFFER: + DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n"); + return -EINVAL; + case RADEON_CP_3D_DRAW_IMMD_2: + /* triggers drawing using in-packet vertex data */ + case RADEON_CP_3D_DRAW_VBUF_2: + /* triggers drawing of vertex buffers setup elsewhere */ + dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED | + RADEON_PURGE_EMITED); + break; + case RADEON_CP_3D_DRAW_INDX_2: + /* triggers drawing using indices to vertex buffer */ + /* whenever we send vertex we clear flush & purge */ + dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED | + RADEON_PURGE_EMITED); + return r300_emit_draw_indx_2(dev_priv, cmdbuf); case RADEON_WAIT_FOR_IDLE: case RADEON_CP_NOP: /* these packets are safe */ break; default: - DRM_ERROR("Unknown packet3 header (0x%08x)\n", header); + DRM_ERROR("Unknown packet3 header (0x%08x)\n", *header); return -EINVAL; } BEGIN_RING(count + 2); - OUT_RING(header); - OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1); + OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2); ADVANCE_RING(); - cmdbuf->buf += (count + 2) * 4; - cmdbuf->bufsz -= (count + 2) * 4; - return 0; } @@ -648,8 +730,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, { int n; int ret; - char *orig_buf = cmdbuf->buf; - int orig_bufsz = cmdbuf->bufsz; + int orig_iter = cmdbuf->buffer->iterator; /* This is a do-while-loop so that we run the interior at least once, * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale. @@ -661,8 +742,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, if (ret) return ret; - cmdbuf->buf = orig_buf; - cmdbuf->bufsz = orig_bufsz; + cmdbuf->buffer->iterator = orig_iter; } switch (header.packet3.packet) { @@ -685,9 +765,9 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, break; default: - DRM_ERROR("bad packet3 type %i at %p\n", + DRM_ERROR("bad packet3 type %i at byte %d\n", header.packet3.packet, - cmdbuf->buf - sizeof(header)); + cmdbuf->buffer->iterator - (int)sizeof(header)); return -EINVAL; } @@ -713,17 +793,53 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, */ static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) { + uint32_t cache_z, cache_3d, cache_2d; RING_LOCALS; - BEGIN_RING(6); - OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); - OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A); + cache_z = R300_ZC_FLUSH; + cache_2d = R300_RB2D_DC_FLUSH; + cache_3d = R300_RB3D_DC_FLUSH; + if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) { + /* we can purge, primitive where draw since last purge */ + cache_z |= R300_ZC_FREE; + cache_2d |= R300_RB2D_DC_FREE; + cache_3d |= R300_RB3D_DC_FREE; + } + + /* flush & purge zbuffer */ + BEGIN_RING(2); OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0)); - OUT_RING(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE| - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0)); - OUT_RING(0x0); + OUT_RING(cache_z); + ADVANCE_RING(); + /* flush & purge 3d */ + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); + OUT_RING(cache_3d); + ADVANCE_RING(); + /* flush & purge texture */ + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0)); + OUT_RING(0); + ADVANCE_RING(); + /* FIXME: is this one really needed ? */ + BEGIN_RING(2); + OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0)); + OUT_RING(0); + ADVANCE_RING(); + BEGIN_RING(2); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_3D_IDLECLEAN); + ADVANCE_RING(); + /* flush & purge 2d through E2 as RB2D will trigger lockup */ + BEGIN_RING(4); + OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0)); + OUT_RING(cache_2d); + OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_RING(RADEON_WAIT_2D_IDLECLEAN | + RADEON_WAIT_HOST_IDLECLEAN); ADVANCE_RING(); + /* set flush & purge flags */ + dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED; } /** @@ -731,12 +847,12 @@ static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must * be careful about how this function is called. */ -static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf) +static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf) { - drm_radeon_private_t *dev_priv = dev->dev_private; drm_radeon_buf_priv_t *buf_priv = buf->dev_private; + struct drm_radeon_master_private *master_priv = master->driver_priv; - buf_priv->age = ++dev_priv->sarea_priv->last_dispatch; + buf_priv->age = ++master_priv->sarea_priv->last_dispatch; buf->pending = 1; buf->used = 0; } @@ -787,11 +903,13 @@ static int r300_scratch(drm_radeon_private_t *dev_priv, drm_r300_cmd_header_t header) { u32 *ref_age_base; - u32 i, buf_idx, h_pending; + u32 i, *buf_idx, h_pending; + u64 *ptr_addr; + u64 stack_ptr_addr; RING_LOCALS; - if (cmdbuf->bufsz < - (sizeof(u64) + header.scratch.n_bufs * sizeof(buf_idx))) { + if (drm_buffer_unprocessed(cmdbuf->buffer) < + (sizeof(u64) + header.scratch.n_bufs * sizeof(*buf_idx))) { return -EINVAL; } @@ -801,35 +919,35 @@ static int r300_scratch(drm_radeon_private_t *dev_priv, dev_priv->scratch_ages[header.scratch.reg]++; - ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf); - - cmdbuf->buf += sizeof(u64); - cmdbuf->bufsz -= sizeof(u64); + ptr_addr = drm_buffer_read_object(cmdbuf->buffer, + sizeof(stack_ptr_addr), &stack_ptr_addr); + ref_age_base = (u32 *)(unsigned long)get_unaligned(ptr_addr); for (i=0; i < header.scratch.n_bufs; i++) { - buf_idx = *(u32 *)cmdbuf->buf; - buf_idx *= 2; /* 8 bytes per buf */ + buf_idx = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0); + *buf_idx *= 2; /* 8 bytes per buf */ - if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) { + if (DRM_COPY_TO_USER(ref_age_base + *buf_idx, + &dev_priv->scratch_ages[header.scratch.reg], + sizeof(u32))) return -EINVAL; - } - if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) { + if (DRM_COPY_FROM_USER(&h_pending, + ref_age_base + *buf_idx + 1, + sizeof(u32))) return -EINVAL; - } - if (h_pending == 0) { + if (h_pending == 0) return -EINVAL; - } h_pending--; - if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) { + if (DRM_COPY_TO_USER(ref_age_base + *buf_idx + 1, + &h_pending, + sizeof(u32))) return -EINVAL; - } - cmdbuf->buf += sizeof(buf_idx); - cmdbuf->bufsz -= sizeof(buf_idx); + drm_buffer_advance(cmdbuf->buffer, sizeof(*buf_idx)); } BEGIN_RING(2); @@ -852,7 +970,7 @@ static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv, int sz; int addr; int type; - int clamp; + int isclamp; int stride; RING_LOCALS; @@ -861,29 +979,26 @@ static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv, addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo; type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); - clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); + isclamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); addr |= (type << 16); - addr |= (clamp << 17); + addr |= (isclamp << 17); stride = type ? 4 : 6; DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type); if (!sz) return 0; - if (sz * stride * 4 > cmdbuf->bufsz) + if (sz * stride * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) return -EINVAL; BEGIN_RING(3 + sz * stride); OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr); OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1)); - OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride); + OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * stride); ADVANCE_RING(); - cmdbuf->buf += sz * stride * 4; - cmdbuf->bufsz -= sz * stride * 4; - return 0; } @@ -898,6 +1013,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, drm_radeon_kcmd_buffer_t *cmdbuf) { drm_radeon_private_t *dev_priv = dev->dev_private; + struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv; struct drm_device_dma *dma = dev->dma; struct drm_buf *buf = NULL; int emit_dispatch_age = 0; @@ -905,8 +1021,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, DRM_DEBUG("\n"); - /* See the comment above r300_emit_begin3d for why this call must be here, - * and what the cleanup gotos are for. */ + /* pacify */ r300_pacify(dev_priv); if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) { @@ -915,19 +1030,18 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, goto cleanup; } - while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) { + while (drm_buffer_unprocessed(cmdbuf->buffer) + >= sizeof(drm_r300_cmd_header_t)) { int idx; - drm_r300_cmd_header_t header; - - header.u = *(unsigned int *)cmdbuf->buf; + drm_r300_cmd_header_t *header, stack_header; - cmdbuf->buf += sizeof(header); - cmdbuf->bufsz -= sizeof(header); + header = drm_buffer_read_object(cmdbuf->buffer, + sizeof(stack_header), &stack_header); - switch (header.header.cmd_type) { + switch (header->header.cmd_type) { case R300_CMD_PACKET0: DRM_DEBUG("R300_CMD_PACKET0\n"); - ret = r300_emit_packet0(dev_priv, cmdbuf, header); + ret = r300_emit_packet0(dev_priv, cmdbuf, *header); if (ret) { DRM_ERROR("r300_emit_packet0 failed\n"); goto cleanup; @@ -936,7 +1050,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, case R300_CMD_VPU: DRM_DEBUG("R300_CMD_VPU\n"); - ret = r300_emit_vpu(dev_priv, cmdbuf, header); + ret = r300_emit_vpu(dev_priv, cmdbuf, *header); if (ret) { DRM_ERROR("r300_emit_vpu failed\n"); goto cleanup; @@ -945,7 +1059,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, case R300_CMD_PACKET3: DRM_DEBUG("R300_CMD_PACKET3\n"); - ret = r300_emit_packet3(dev_priv, cmdbuf, header); + ret = r300_emit_packet3(dev_priv, cmdbuf, *header); if (ret) { DRM_ERROR("r300_emit_packet3 failed\n"); goto cleanup; @@ -979,8 +1093,8 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, int i; RING_LOCALS; - BEGIN_RING(header.delay.count); - for (i = 0; i < header.delay.count; i++) + BEGIN_RING(header->delay.count); + for (i = 0; i < header->delay.count; i++) OUT_RING(RADEON_CP_PACKET2); ADVANCE_RING(); } @@ -988,7 +1102,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, case R300_CMD_DMA_DISCARD: DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n"); - idx = header.dma.buf_idx; + idx = header->dma.buf_idx; if (idx < 0 || idx >= dma->buf_count) { DRM_ERROR("buffer index %d (of %d max)\n", idx, dma->buf_count - 1); @@ -1006,17 +1120,17 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, } emit_dispatch_age = 1; - r300_discard_buffer(dev, buf); + r300_discard_buffer(dev, file_priv->master, buf); break; case R300_CMD_WAIT: DRM_DEBUG("R300_CMD_WAIT\n"); - r300_cmd_wait(dev_priv, header); + r300_cmd_wait(dev_priv, *header); break; case R300_CMD_SCRATCH: DRM_DEBUG("R300_CMD_SCRATCH\n"); - ret = r300_scratch(dev_priv, cmdbuf, header); + ret = r300_scratch(dev_priv, cmdbuf, *header); if (ret) { DRM_ERROR("r300_scratch failed\n"); goto cleanup; @@ -1030,16 +1144,16 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, goto cleanup; } DRM_DEBUG("R300_CMD_R500FP\n"); - ret = r300_emit_r500fp(dev_priv, cmdbuf, header); + ret = r300_emit_r500fp(dev_priv, cmdbuf, *header); if (ret) { DRM_ERROR("r300_emit_r500fp failed\n"); goto cleanup; } break; default: - DRM_ERROR("bad cmd_type %i at %p\n", - header.header.cmd_type, - cmdbuf->buf - sizeof(header)); + DRM_ERROR("bad cmd_type %i at byte %d\n", + header->header.cmd_type, + cmdbuf->buffer->iterator - (int)sizeof(*header)); ret = -EINVAL; goto cleanup; } @@ -1061,7 +1175,7 @@ int r300_do_cp_cmdbuf(struct drm_device *dev, /* Emit the vertex buffer age */ BEGIN_RING(2); - RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch); + RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch); ADVANCE_RING(); }