1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
36 /* ================================================================
37 * Helper functions for client state checking and fixup
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42 drm_file_t * filp_priv,
46 struct drm_radeon_driver_file_fields *radeon_priv;
48 /* Hrm ... the story of the offset ... So this function converts
49 * the various ideas of what userland clients might have for an
50 * offset in the card address space into an offset into the card
51 * address space :) So with a sane client, it should just keep
52 * the value intact and just do some boundary checking. However,
53 * not all clients are sane. Some older clients pass us 0 based
54 * offsets relative to the start of the framebuffer and some may
55 * assume the AGP aperture it appended to the framebuffer, so we
56 * try to detect those cases and fix them up.
58 * Note: It might be a good idea here to make sure the offset lands
59 * in some "allowed" area to protect things like the PCIE GART...
62 /* First, the best case, the offset already lands in either the
63 * framebuffer or the GART mapped space
65 if ((off >= dev_priv->fb_location &&
66 off < (dev_priv->fb_location + dev_priv->fb_size)) ||
67 (off >= dev_priv->gart_vm_start &&
68 off < (dev_priv->gart_vm_start + dev_priv->gart_size)))
71 /* Ok, that didn't happen... now check if we have a zero based
72 * offset that fits in the framebuffer + gart space, apply the
73 * magic offset we get from SETPARAM or calculated from fb_location
75 if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76 radeon_priv = filp_priv->driver_priv;
77 off += radeon_priv->radeon_fb_delta;
80 /* Finally, assume we aimed at a GART offset if beyond the fb */
81 if (off > (dev_priv->fb_location + dev_priv->fb_size))
82 off = off - (dev_priv->fb_location + dev_priv->fb_size) +
83 dev_priv->gart_vm_start;
85 /* Now recheck and fail if out of bounds */
86 if ((off >= dev_priv->fb_location &&
87 off < (dev_priv->fb_location + dev_priv->fb_size)) ||
88 (off >= dev_priv->gart_vm_start &&
89 off < (dev_priv->gart_vm_start + dev_priv->gart_size))) {
90 DRM_DEBUG("offset fixed up to 0x%x\n", off);
94 return DRM_ERR(EINVAL);
97 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
99 drm_file_t * filp_priv,
104 case RADEON_EMIT_PP_MISC:
105 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
106 &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
107 DRM_ERROR("Invalid depth buffer offset\n");
108 return DRM_ERR(EINVAL);
112 case RADEON_EMIT_PP_CNTL:
113 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
114 &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
115 DRM_ERROR("Invalid colour buffer offset\n");
116 return DRM_ERR(EINVAL);
120 case R200_EMIT_PP_TXOFFSET_0:
121 case R200_EMIT_PP_TXOFFSET_1:
122 case R200_EMIT_PP_TXOFFSET_2:
123 case R200_EMIT_PP_TXOFFSET_3:
124 case R200_EMIT_PP_TXOFFSET_4:
125 case R200_EMIT_PP_TXOFFSET_5:
126 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
128 DRM_ERROR("Invalid R200 texture offset\n");
129 return DRM_ERR(EINVAL);
133 case RADEON_EMIT_PP_TXFILTER_0:
134 case RADEON_EMIT_PP_TXFILTER_1:
135 case RADEON_EMIT_PP_TXFILTER_2:
136 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
137 &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
138 DRM_ERROR("Invalid R100 texture offset\n");
139 return DRM_ERR(EINVAL);
143 case R200_EMIT_PP_CUBIC_OFFSETS_0:
144 case R200_EMIT_PP_CUBIC_OFFSETS_1:
145 case R200_EMIT_PP_CUBIC_OFFSETS_2:
146 case R200_EMIT_PP_CUBIC_OFFSETS_3:
147 case R200_EMIT_PP_CUBIC_OFFSETS_4:
148 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
150 for (i = 0; i < 5; i++) {
151 if (radeon_check_and_fixup_offset(dev_priv,
155 ("Invalid R200 cubic texture offset\n");
156 return DRM_ERR(EINVAL);
162 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
163 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
164 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
166 for (i = 0; i < 5; i++) {
167 if (radeon_check_and_fixup_offset(dev_priv,
171 ("Invalid R100 cubic texture offset\n");
172 return DRM_ERR(EINVAL);
178 case RADEON_EMIT_RB3D_COLORPITCH:
179 case RADEON_EMIT_RE_LINE_PATTERN:
180 case RADEON_EMIT_SE_LINE_WIDTH:
181 case RADEON_EMIT_PP_LUM_MATRIX:
182 case RADEON_EMIT_PP_ROT_MATRIX_0:
183 case RADEON_EMIT_RB3D_STENCILREFMASK:
184 case RADEON_EMIT_SE_VPORT_XSCALE:
185 case RADEON_EMIT_SE_CNTL:
186 case RADEON_EMIT_SE_CNTL_STATUS:
187 case RADEON_EMIT_RE_MISC:
188 case RADEON_EMIT_PP_BORDER_COLOR_0:
189 case RADEON_EMIT_PP_BORDER_COLOR_1:
190 case RADEON_EMIT_PP_BORDER_COLOR_2:
191 case RADEON_EMIT_SE_ZBIAS_FACTOR:
192 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
193 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
194 case R200_EMIT_PP_TXCBLEND_0:
195 case R200_EMIT_PP_TXCBLEND_1:
196 case R200_EMIT_PP_TXCBLEND_2:
197 case R200_EMIT_PP_TXCBLEND_3:
198 case R200_EMIT_PP_TXCBLEND_4:
199 case R200_EMIT_PP_TXCBLEND_5:
200 case R200_EMIT_PP_TXCBLEND_6:
201 case R200_EMIT_PP_TXCBLEND_7:
202 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
203 case R200_EMIT_TFACTOR_0:
204 case R200_EMIT_VTX_FMT_0:
205 case R200_EMIT_VAP_CTL:
206 case R200_EMIT_MATRIX_SELECT_0:
207 case R200_EMIT_TEX_PROC_CTL_2:
208 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
209 case R200_EMIT_PP_TXFILTER_0:
210 case R200_EMIT_PP_TXFILTER_1:
211 case R200_EMIT_PP_TXFILTER_2:
212 case R200_EMIT_PP_TXFILTER_3:
213 case R200_EMIT_PP_TXFILTER_4:
214 case R200_EMIT_PP_TXFILTER_5:
215 case R200_EMIT_VTE_CNTL:
216 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
217 case R200_EMIT_PP_TAM_DEBUG3:
218 case R200_EMIT_PP_CNTL_X:
219 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
220 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
221 case R200_EMIT_RE_SCISSOR_TL_0:
222 case R200_EMIT_RE_SCISSOR_TL_1:
223 case R200_EMIT_RE_SCISSOR_TL_2:
224 case R200_EMIT_SE_VAP_CNTL_STATUS:
225 case R200_EMIT_SE_VTX_STATE_CNTL:
226 case R200_EMIT_RE_POINTSIZE:
227 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
228 case R200_EMIT_PP_CUBIC_FACES_0:
229 case R200_EMIT_PP_CUBIC_FACES_1:
230 case R200_EMIT_PP_CUBIC_FACES_2:
231 case R200_EMIT_PP_CUBIC_FACES_3:
232 case R200_EMIT_PP_CUBIC_FACES_4:
233 case R200_EMIT_PP_CUBIC_FACES_5:
234 case RADEON_EMIT_PP_TEX_SIZE_0:
235 case RADEON_EMIT_PP_TEX_SIZE_1:
236 case RADEON_EMIT_PP_TEX_SIZE_2:
237 case R200_EMIT_RB3D_BLENDCOLOR:
238 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
239 case RADEON_EMIT_PP_CUBIC_FACES_0:
240 case RADEON_EMIT_PP_CUBIC_FACES_1:
241 case RADEON_EMIT_PP_CUBIC_FACES_2:
242 case R200_EMIT_PP_TRI_PERF_CNTL:
243 case R200_EMIT_PP_AFS_0:
244 case R200_EMIT_PP_AFS_1:
245 case R200_EMIT_ATF_TFACTOR:
246 case R200_EMIT_PP_TXCTLALL_0:
247 case R200_EMIT_PP_TXCTLALL_1:
248 case R200_EMIT_PP_TXCTLALL_2:
249 case R200_EMIT_PP_TXCTLALL_3:
250 case R200_EMIT_PP_TXCTLALL_4:
251 case R200_EMIT_PP_TXCTLALL_5:
252 /* These packets don't contain memory offsets */
256 DRM_ERROR("Unknown state packet ID %d\n", id);
257 return DRM_ERR(EINVAL);
263 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
265 drm_file_t *filp_priv,
266 drm_radeon_kcmd_buffer_t *
270 u32 *cmd = (u32 *) cmdbuf->buf;
272 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
274 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
275 DRM_ERROR("Not a type 3 packet\n");
276 return DRM_ERR(EINVAL);
279 if (4 * *cmdsz > cmdbuf->bufsz) {
280 DRM_ERROR("Packet size larger than size of data provided\n");
281 return DRM_ERR(EINVAL);
284 /* Check client state and fix it up if necessary */
285 if (cmd[0] & 0x8000) { /* MSB of opcode: next DWORD GUI_CNTL */
288 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
289 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
290 offset = cmd[2] << 10;
291 if (radeon_check_and_fixup_offset
292 (dev_priv, filp_priv, &offset)) {
293 DRM_ERROR("Invalid first packet offset\n");
294 return DRM_ERR(EINVAL);
296 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
299 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
300 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
301 offset = cmd[3] << 10;
302 if (radeon_check_and_fixup_offset
303 (dev_priv, filp_priv, &offset)) {
304 DRM_ERROR("Invalid second packet offset\n");
305 return DRM_ERR(EINVAL);
307 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
314 /* ================================================================
315 * CP hardware state programming functions
318 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
319 drm_clip_rect_t * box)
323 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
324 box->x1, box->y1, box->x2, box->y2);
327 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
328 OUT_RING((box->y1 << 16) | box->x1);
329 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
330 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
336 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
337 drm_file_t * filp_priv,
338 drm_radeon_context_regs_t * ctx,
339 drm_radeon_texture_regs_t * tex,
343 DRM_DEBUG("dirty=0x%08x\n", dirty);
345 if (dirty & RADEON_UPLOAD_CONTEXT) {
346 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
347 &ctx->rb3d_depthoffset)) {
348 DRM_ERROR("Invalid depth buffer offset\n");
349 return DRM_ERR(EINVAL);
352 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
353 &ctx->rb3d_coloroffset)) {
354 DRM_ERROR("Invalid depth buffer offset\n");
355 return DRM_ERR(EINVAL);
359 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
360 OUT_RING(ctx->pp_misc);
361 OUT_RING(ctx->pp_fog_color);
362 OUT_RING(ctx->re_solid_color);
363 OUT_RING(ctx->rb3d_blendcntl);
364 OUT_RING(ctx->rb3d_depthoffset);
365 OUT_RING(ctx->rb3d_depthpitch);
366 OUT_RING(ctx->rb3d_zstencilcntl);
367 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
368 OUT_RING(ctx->pp_cntl);
369 OUT_RING(ctx->rb3d_cntl);
370 OUT_RING(ctx->rb3d_coloroffset);
371 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
372 OUT_RING(ctx->rb3d_colorpitch);
376 if (dirty & RADEON_UPLOAD_VERTFMT) {
378 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
379 OUT_RING(ctx->se_coord_fmt);
383 if (dirty & RADEON_UPLOAD_LINE) {
385 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
386 OUT_RING(ctx->re_line_pattern);
387 OUT_RING(ctx->re_line_state);
388 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
389 OUT_RING(ctx->se_line_width);
393 if (dirty & RADEON_UPLOAD_BUMPMAP) {
395 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
396 OUT_RING(ctx->pp_lum_matrix);
397 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
398 OUT_RING(ctx->pp_rot_matrix_0);
399 OUT_RING(ctx->pp_rot_matrix_1);
403 if (dirty & RADEON_UPLOAD_MASKS) {
405 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
406 OUT_RING(ctx->rb3d_stencilrefmask);
407 OUT_RING(ctx->rb3d_ropcntl);
408 OUT_RING(ctx->rb3d_planemask);
412 if (dirty & RADEON_UPLOAD_VIEWPORT) {
414 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
415 OUT_RING(ctx->se_vport_xscale);
416 OUT_RING(ctx->se_vport_xoffset);
417 OUT_RING(ctx->se_vport_yscale);
418 OUT_RING(ctx->se_vport_yoffset);
419 OUT_RING(ctx->se_vport_zscale);
420 OUT_RING(ctx->se_vport_zoffset);
424 if (dirty & RADEON_UPLOAD_SETUP) {
426 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
427 OUT_RING(ctx->se_cntl);
428 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
429 OUT_RING(ctx->se_cntl_status);
433 if (dirty & RADEON_UPLOAD_MISC) {
435 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
436 OUT_RING(ctx->re_misc);
440 if (dirty & RADEON_UPLOAD_TEX0) {
441 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
442 &tex[0].pp_txoffset)) {
443 DRM_ERROR("Invalid texture offset for unit 0\n");
444 return DRM_ERR(EINVAL);
448 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
449 OUT_RING(tex[0].pp_txfilter);
450 OUT_RING(tex[0].pp_txformat);
451 OUT_RING(tex[0].pp_txoffset);
452 OUT_RING(tex[0].pp_txcblend);
453 OUT_RING(tex[0].pp_txablend);
454 OUT_RING(tex[0].pp_tfactor);
455 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
456 OUT_RING(tex[0].pp_border_color);
460 if (dirty & RADEON_UPLOAD_TEX1) {
461 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
462 &tex[1].pp_txoffset)) {
463 DRM_ERROR("Invalid texture offset for unit 1\n");
464 return DRM_ERR(EINVAL);
468 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
469 OUT_RING(tex[1].pp_txfilter);
470 OUT_RING(tex[1].pp_txformat);
471 OUT_RING(tex[1].pp_txoffset);
472 OUT_RING(tex[1].pp_txcblend);
473 OUT_RING(tex[1].pp_txablend);
474 OUT_RING(tex[1].pp_tfactor);
475 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
476 OUT_RING(tex[1].pp_border_color);
480 if (dirty & RADEON_UPLOAD_TEX2) {
481 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
482 &tex[2].pp_txoffset)) {
483 DRM_ERROR("Invalid texture offset for unit 2\n");
484 return DRM_ERR(EINVAL);
488 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
489 OUT_RING(tex[2].pp_txfilter);
490 OUT_RING(tex[2].pp_txformat);
491 OUT_RING(tex[2].pp_txoffset);
492 OUT_RING(tex[2].pp_txcblend);
493 OUT_RING(tex[2].pp_txablend);
494 OUT_RING(tex[2].pp_tfactor);
495 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
496 OUT_RING(tex[2].pp_border_color);
505 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
506 drm_file_t * filp_priv,
507 drm_radeon_state_t * state)
511 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
513 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
514 OUT_RING(state->context2.se_zbias_factor);
515 OUT_RING(state->context2.se_zbias_constant);
519 return radeon_emit_state(dev_priv, filp_priv, &state->context,
520 state->tex, state->dirty);
523 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
524 * 1.3 cmdbuffers allow all previous state to be updated as well as
525 * the tcl scalar and vector areas.
531 } packet[RADEON_MAX_STATE_PACKETS] = {
532 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
533 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
534 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
535 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
536 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
537 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
538 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
539 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
540 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
541 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
542 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
543 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
544 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
545 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
546 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
547 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
548 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
549 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
550 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
551 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
552 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
553 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
554 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
555 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
556 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
557 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
558 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
559 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
560 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
561 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
562 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
563 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
564 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
565 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
566 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
567 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
568 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
569 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
570 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
571 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
572 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
573 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
574 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
575 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
576 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
577 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
578 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
579 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
580 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
581 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
582 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
583 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
584 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
585 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
586 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
587 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
588 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
589 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
590 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
591 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
592 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
593 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
594 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
595 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
596 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
597 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
598 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
599 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
600 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
601 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
602 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
603 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
604 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
605 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
606 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
607 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
608 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
609 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
610 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
611 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
612 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
613 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
614 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
615 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
616 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
617 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
618 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
619 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
620 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
621 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
622 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
623 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
624 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
625 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
626 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
627 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
628 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
631 /* ================================================================
632 * Performance monitoring functions
635 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
636 int x, int y, int w, int h, int r, int g, int b)
641 x += dev_priv->sarea_priv->boxes[0].x1;
642 y += dev_priv->sarea_priv->boxes[0].y1;
644 switch (dev_priv->color_fmt) {
645 case RADEON_COLOR_FORMAT_RGB565:
646 color = (((r & 0xf8) << 8) |
647 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
649 case RADEON_COLOR_FORMAT_ARGB8888:
651 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
656 RADEON_WAIT_UNTIL_3D_IDLE();
657 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
658 OUT_RING(0xffffffff);
663 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
664 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
665 RADEON_GMC_BRUSH_SOLID_COLOR |
666 (dev_priv->color_fmt << 8) |
667 RADEON_GMC_SRC_DATATYPE_COLOR |
668 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
670 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
671 OUT_RING(dev_priv->front_pitch_offset);
673 OUT_RING(dev_priv->back_pitch_offset);
678 OUT_RING((x << 16) | y);
679 OUT_RING((w << 16) | h);
684 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
686 /* Collapse various things into a wait flag -- trying to
687 * guess if userspase slept -- better just to have them tell us.
689 if (dev_priv->stats.last_frame_reads > 1 ||
690 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
691 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
694 if (dev_priv->stats.freelist_loops) {
695 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
698 /* Purple box for page flipping
700 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
701 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
703 /* Red box if we have to wait for idle at any point
705 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
706 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
708 /* Blue box: lost context?
711 /* Yellow box for texture swaps
713 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
714 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
716 /* Green box if hardware never idles (as far as we can tell)
718 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
719 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
721 /* Draw bars indicating number of buffers allocated
722 * (not a great measure, easily confused)
724 if (dev_priv->stats.requested_bufs) {
725 if (dev_priv->stats.requested_bufs > 100)
726 dev_priv->stats.requested_bufs = 100;
728 radeon_clear_box(dev_priv, 4, 16,
729 dev_priv->stats.requested_bufs, 4,
733 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
737 /* ================================================================
738 * CP command dispatch functions
741 static void radeon_cp_dispatch_clear(drm_device_t * dev,
742 drm_radeon_clear_t * clear,
743 drm_radeon_clear_rect_t * depth_boxes)
745 drm_radeon_private_t *dev_priv = dev->dev_private;
746 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
747 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
748 int nbox = sarea_priv->nbox;
749 drm_clip_rect_t *pbox = sarea_priv->boxes;
750 unsigned int flags = clear->flags;
751 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
754 DRM_DEBUG("flags = 0x%x\n", flags);
756 dev_priv->stats.clears++;
758 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
759 unsigned int tmp = flags;
761 flags &= ~(RADEON_FRONT | RADEON_BACK);
762 if (tmp & RADEON_FRONT)
763 flags |= RADEON_BACK;
764 if (tmp & RADEON_BACK)
765 flags |= RADEON_FRONT;
768 if (flags & (RADEON_FRONT | RADEON_BACK)) {
772 /* Ensure the 3D stream is idle before doing a
773 * 2D fill to clear the front or back buffer.
775 RADEON_WAIT_UNTIL_3D_IDLE();
777 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
778 OUT_RING(clear->color_mask);
782 /* Make sure we restore the 3D state next time.
784 dev_priv->sarea_priv->ctx_owner = 0;
786 for (i = 0; i < nbox; i++) {
789 int w = pbox[i].x2 - x;
790 int h = pbox[i].y2 - y;
792 DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
795 if (flags & RADEON_FRONT) {
799 (RADEON_CNTL_PAINT_MULTI, 4));
800 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
801 RADEON_GMC_BRUSH_SOLID_COLOR |
804 RADEON_GMC_SRC_DATATYPE_COLOR |
806 RADEON_GMC_CLR_CMP_CNTL_DIS);
808 OUT_RING(dev_priv->front_pitch_offset);
809 OUT_RING(clear->clear_color);
811 OUT_RING((x << 16) | y);
812 OUT_RING((w << 16) | h);
817 if (flags & RADEON_BACK) {
821 (RADEON_CNTL_PAINT_MULTI, 4));
822 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
823 RADEON_GMC_BRUSH_SOLID_COLOR |
826 RADEON_GMC_SRC_DATATYPE_COLOR |
828 RADEON_GMC_CLR_CMP_CNTL_DIS);
830 OUT_RING(dev_priv->back_pitch_offset);
831 OUT_RING(clear->clear_color);
833 OUT_RING((x << 16) | y);
834 OUT_RING((w << 16) | h);
842 /* no docs available, based on reverse engeneering by Stephane Marchesin */
843 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
844 && (flags & RADEON_CLEAR_FASTZ)) {
847 int depthpixperline =
848 dev_priv->depth_fmt ==
849 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
855 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
856 ((clear->depth_mask & 0xff) << 24);
858 /* Make sure we restore the 3D state next time.
859 * we haven't touched any "normal" state - still need this?
861 dev_priv->sarea_priv->ctx_owner = 0;
863 if ((dev_priv->flags & CHIP_HAS_HIERZ)
864 && (flags & RADEON_USE_HIERZ)) {
865 /* FIXME : reverse engineer that for Rx00 cards */
866 /* FIXME : the mask supposedly contains low-res z values. So can't set
867 just to the max (0xff? or actually 0x3fff?), need to take z clear
868 value into account? */
869 /* pattern seems to work for r100, though get slight
870 rendering errors with glxgears. If hierz is not enabled for r100,
871 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
872 other ones are ignored, and the same clear mask can be used. That's
873 very different behaviour than R200 which needs different clear mask
874 and different number of tiles to clear if hierz is enabled or not !?!
876 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
878 /* clear mask : chooses the clearing pattern.
879 rv250: could be used to clear only parts of macrotiles
880 (but that would get really complicated...)?
881 bit 0 and 1 (either or both of them ?!?!) are used to
882 not clear tile (or maybe one of the bits indicates if the tile is
883 compressed or not), bit 2 and 3 to not clear tile 1,...,.
884 Pattern is as follows:
885 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
886 bits -------------------------------------------------
887 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
888 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
889 covers 256 pixels ?!?
895 RADEON_WAIT_UNTIL_2D_IDLE();
896 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
897 tempRB3D_DEPTHCLEARVALUE);
898 /* what offset is this exactly ? */
899 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
900 /* need ctlstat, otherwise get some strange black flickering */
901 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
902 RADEON_RB3D_ZC_FLUSH_ALL);
905 for (i = 0; i < nbox; i++) {
906 int tileoffset, nrtilesx, nrtilesy, j;
907 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
908 if ((dev_priv->flags & CHIP_HAS_HIERZ)
909 && !(dev_priv->microcode_version == UCODE_R200)) {
910 /* FIXME : figure this out for r200 (when hierz is enabled). Or
911 maybe r200 actually doesn't need to put the low-res z value into
912 the tile cache like r100, but just needs to clear the hi-level z-buffer?
913 Works for R100, both with hierz and without.
914 R100 seems to operate on 2x1 8x8 tiles, but...
915 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
916 problematic with resolutions which are not 64 pix aligned? */
918 ((pbox[i].y1 >> 3) * depthpixperline +
921 ((pbox[i].x2 & ~63) -
922 (pbox[i].x1 & ~63)) >> 4;
924 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
925 for (j = 0; j <= nrtilesy; j++) {
928 (RADEON_3D_CLEAR_ZMASK, 2));
930 OUT_RING(tileoffset * 8);
931 /* the number of tiles to clear */
932 OUT_RING(nrtilesx + 4);
933 /* clear mask : chooses the clearing pattern. */
936 tileoffset += depthpixperline >> 6;
938 } else if (dev_priv->microcode_version == UCODE_R200) {
939 /* works for rv250. */
940 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
942 ((pbox[i].y1 >> 3) * depthpixperline +
945 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
947 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
948 for (j = 0; j <= nrtilesy; j++) {
951 (RADEON_3D_CLEAR_ZMASK, 2));
953 /* judging by the first tile offset needed, could possibly
954 directly address/clear 4x4 tiles instead of 8x2 * 4x4
955 macro tiles, though would still need clear mask for
956 right/bottom if truely 4x4 granularity is desired ? */
957 OUT_RING(tileoffset * 16);
958 /* the number of tiles to clear */
959 OUT_RING(nrtilesx + 1);
960 /* clear mask : chooses the clearing pattern. */
963 tileoffset += depthpixperline >> 5;
965 } else { /* rv 100 */
966 /* rv100 might not need 64 pix alignment, who knows */
967 /* offsets are, hmm, weird */
969 ((pbox[i].y1 >> 4) * depthpixperline +
972 ((pbox[i].x2 & ~63) -
973 (pbox[i].x1 & ~63)) >> 4;
975 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
976 for (j = 0; j <= nrtilesy; j++) {
979 (RADEON_3D_CLEAR_ZMASK, 2));
980 OUT_RING(tileoffset * 128);
981 /* the number of tiles to clear */
982 OUT_RING(nrtilesx + 4);
983 /* clear mask : chooses the clearing pattern. */
986 tileoffset += depthpixperline >> 6;
991 /* TODO don't always clear all hi-level z tiles */
992 if ((dev_priv->flags & CHIP_HAS_HIERZ)
993 && (dev_priv->microcode_version == UCODE_R200)
994 && (flags & RADEON_USE_HIERZ))
995 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
996 /* FIXME : the mask supposedly contains low-res z values. So can't set
997 just to the max (0xff? or actually 0x3fff?), need to take z clear
998 value into account? */
1001 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1002 OUT_RING(0x0); /* First tile */
1004 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1009 /* We have to clear the depth and/or stencil buffers by
1010 * rendering a quad into just those buffers. Thus, we have to
1011 * make sure the 3D engine is configured correctly.
1013 else if ((dev_priv->microcode_version == UCODE_R200) &&
1014 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1019 int tempRB3D_ZSTENCILCNTL;
1020 int tempRB3D_STENCILREFMASK;
1021 int tempRB3D_PLANEMASK;
1023 int tempSE_VTE_CNTL;
1024 int tempSE_VTX_FMT_0;
1025 int tempSE_VTX_FMT_1;
1026 int tempSE_VAP_CNTL;
1027 int tempRE_AUX_SCISSOR_CNTL;
1032 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1034 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1035 tempRB3D_STENCILREFMASK = 0x0;
1037 tempSE_CNTL = depth_clear->se_cntl;
1041 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1043 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1045 tempRB3D_PLANEMASK = 0x0;
1047 tempRE_AUX_SCISSOR_CNTL = 0x0;
1050 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1052 /* Vertex format (X, Y, Z, W) */
1054 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1055 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1056 tempSE_VTX_FMT_1 = 0x0;
1059 * Depth buffer specific enables
1061 if (flags & RADEON_DEPTH) {
1062 /* Enable depth buffer */
1063 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1065 /* Disable depth buffer */
1066 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1070 * Stencil buffer specific enables
1072 if (flags & RADEON_STENCIL) {
1073 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1074 tempRB3D_STENCILREFMASK = clear->depth_mask;
1076 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1077 tempRB3D_STENCILREFMASK = 0x00000000;
1080 if (flags & RADEON_USE_COMP_ZBUF) {
1081 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1082 RADEON_Z_DECOMPRESSION_ENABLE;
1084 if (flags & RADEON_USE_HIERZ) {
1085 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1089 RADEON_WAIT_UNTIL_2D_IDLE();
1091 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1092 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1093 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1094 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1095 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1096 tempRB3D_STENCILREFMASK);
1097 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1098 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1099 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1100 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1101 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1102 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1103 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1106 /* Make sure we restore the 3D state next time.
1108 dev_priv->sarea_priv->ctx_owner = 0;
1110 for (i = 0; i < nbox; i++) {
1112 /* Funny that this should be required --
1115 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1118 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1119 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1120 RADEON_PRIM_WALK_RING |
1121 (3 << RADEON_NUM_VERTICES_SHIFT)));
1122 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1123 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1124 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1125 OUT_RING(0x3f800000);
1126 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1127 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1128 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1129 OUT_RING(0x3f800000);
1130 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1131 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1132 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1133 OUT_RING(0x3f800000);
1136 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1138 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1140 rb3d_cntl = depth_clear->rb3d_cntl;
1142 if (flags & RADEON_DEPTH) {
1143 rb3d_cntl |= RADEON_Z_ENABLE;
1145 rb3d_cntl &= ~RADEON_Z_ENABLE;
1148 if (flags & RADEON_STENCIL) {
1149 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1150 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1152 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1153 rb3d_stencilrefmask = 0x00000000;
1156 if (flags & RADEON_USE_COMP_ZBUF) {
1157 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1158 RADEON_Z_DECOMPRESSION_ENABLE;
1160 if (flags & RADEON_USE_HIERZ) {
1161 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1165 RADEON_WAIT_UNTIL_2D_IDLE();
1167 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1168 OUT_RING(0x00000000);
1169 OUT_RING(rb3d_cntl);
1171 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1172 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1173 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1174 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1177 /* Make sure we restore the 3D state next time.
1179 dev_priv->sarea_priv->ctx_owner = 0;
1181 for (i = 0; i < nbox; i++) {
1183 /* Funny that this should be required --
1186 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1190 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1191 OUT_RING(RADEON_VTX_Z_PRESENT |
1192 RADEON_VTX_PKCOLOR_PRESENT);
1193 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1194 RADEON_PRIM_WALK_RING |
1195 RADEON_MAOS_ENABLE |
1196 RADEON_VTX_FMT_RADEON_MODE |
1197 (3 << RADEON_NUM_VERTICES_SHIFT)));
1199 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1200 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1201 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1204 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1205 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1206 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1209 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1210 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1211 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1218 /* Increment the clear counter. The client-side 3D driver must
1219 * wait on this value before performing the clear ioctl. We
1220 * need this because the card's so damned fast...
1222 dev_priv->sarea_priv->last_clear++;
1226 RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1227 RADEON_WAIT_UNTIL_IDLE();
1232 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1234 drm_radeon_private_t *dev_priv = dev->dev_private;
1235 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1236 int nbox = sarea_priv->nbox;
1237 drm_clip_rect_t *pbox = sarea_priv->boxes;
1242 /* Do some trivial performance monitoring...
1244 if (dev_priv->do_boxes)
1245 radeon_cp_performance_boxes(dev_priv);
1247 /* Wait for the 3D stream to idle before dispatching the bitblt.
1248 * This will prevent data corruption between the two streams.
1252 RADEON_WAIT_UNTIL_3D_IDLE();
1256 for (i = 0; i < nbox; i++) {
1259 int w = pbox[i].x2 - x;
1260 int h = pbox[i].y2 - y;
1262 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1266 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1267 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1268 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1269 RADEON_GMC_BRUSH_NONE |
1270 (dev_priv->color_fmt << 8) |
1271 RADEON_GMC_SRC_DATATYPE_COLOR |
1273 RADEON_DP_SRC_SOURCE_MEMORY |
1274 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1276 /* Make this work even if front & back are flipped:
1278 if (dev_priv->current_page == 0) {
1279 OUT_RING(dev_priv->back_pitch_offset);
1280 OUT_RING(dev_priv->front_pitch_offset);
1282 OUT_RING(dev_priv->front_pitch_offset);
1283 OUT_RING(dev_priv->back_pitch_offset);
1286 OUT_RING((x << 16) | y);
1287 OUT_RING((x << 16) | y);
1288 OUT_RING((w << 16) | h);
1293 /* Increment the frame counter. The client-side 3D driver must
1294 * throttle the framerate by waiting for this value before
1295 * performing the swapbuffer ioctl.
1297 dev_priv->sarea_priv->last_frame++;
1301 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1302 RADEON_WAIT_UNTIL_2D_IDLE();
1307 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1309 drm_radeon_private_t *dev_priv = dev->dev_private;
1310 drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1311 int offset = (dev_priv->current_page == 1)
1312 ? dev_priv->front_offset : dev_priv->back_offset;
1314 DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1316 dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1318 /* Do some trivial performance monitoring...
1320 if (dev_priv->do_boxes) {
1321 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1322 radeon_cp_performance_boxes(dev_priv);
1325 /* Update the frame offsets for both CRTCs
1329 RADEON_WAIT_UNTIL_3D_IDLE();
1330 OUT_RING_REG(RADEON_CRTC_OFFSET,
1331 ((sarea->frame.y * dev_priv->front_pitch +
1332 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1334 OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1339 /* Increment the frame counter. The client-side 3D driver must
1340 * throttle the framerate by waiting for this value before
1341 * performing the swapbuffer ioctl.
1343 dev_priv->sarea_priv->last_frame++;
1344 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1345 1 - dev_priv->current_page;
1349 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1354 static int bad_prim_vertex_nr(int primitive, int nr)
1356 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1357 case RADEON_PRIM_TYPE_NONE:
1358 case RADEON_PRIM_TYPE_POINT:
1360 case RADEON_PRIM_TYPE_LINE:
1361 return (nr & 1) || nr == 0;
1362 case RADEON_PRIM_TYPE_LINE_STRIP:
1364 case RADEON_PRIM_TYPE_TRI_LIST:
1365 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1366 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1367 case RADEON_PRIM_TYPE_RECT_LIST:
1368 return nr % 3 || nr == 0;
1369 case RADEON_PRIM_TYPE_TRI_FAN:
1370 case RADEON_PRIM_TYPE_TRI_STRIP:
1379 unsigned int finish;
1381 unsigned int numverts;
1382 unsigned int offset;
1383 unsigned int vc_format;
1384 } drm_radeon_tcl_prim_t;
1386 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1388 drm_radeon_tcl_prim_t * prim)
1390 drm_radeon_private_t *dev_priv = dev->dev_private;
1391 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1392 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1393 int numverts = (int)prim->numverts;
1394 int nbox = sarea_priv->nbox;
1398 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1400 prim->vc_format, prim->start, prim->finish, prim->numverts);
1402 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1403 DRM_ERROR("bad prim %x numverts %d\n",
1404 prim->prim, prim->numverts);
1409 /* Emit the next cliprect */
1411 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1414 /* Emit the vertex buffer rendering commands */
1417 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1420 OUT_RING(prim->vc_format);
1421 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1422 RADEON_COLOR_ORDER_RGBA |
1423 RADEON_VTX_FMT_RADEON_MODE |
1424 (numverts << RADEON_NUM_VERTICES_SHIFT));
1432 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1434 drm_radeon_private_t *dev_priv = dev->dev_private;
1435 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1438 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1440 /* Emit the vertex buffer age */
1442 RADEON_DISPATCH_AGE(buf_priv->age);
1449 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1450 drm_buf_t * buf, int start, int end)
1452 drm_radeon_private_t *dev_priv = dev->dev_private;
1454 DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1457 int offset = (dev_priv->gart_buffers_offset
1458 + buf->offset + start);
1459 int dwords = (end - start + 3) / sizeof(u32);
1461 /* Indirect buffer data must be an even number of
1462 * dwords, so if we've been given an odd number we must
1463 * pad the data with a Type-2 CP packet.
1467 ((char *)dev->agp_buffer_map->handle
1468 + buf->offset + start);
1469 data[dwords++] = RADEON_CP_PACKET2;
1472 /* Fire off the indirect buffer */
1475 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1483 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1484 drm_buf_t * elt_buf,
1485 drm_radeon_tcl_prim_t * prim)
1487 drm_radeon_private_t *dev_priv = dev->dev_private;
1488 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1489 int offset = dev_priv->gart_buffers_offset + prim->offset;
1493 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1494 int count = (prim->finish - start) / sizeof(u16);
1495 int nbox = sarea_priv->nbox;
1497 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1500 prim->start, prim->finish, prim->offset, prim->numverts);
1502 if (bad_prim_vertex_nr(prim->prim, count)) {
1503 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1507 if (start >= prim->finish || (prim->start & 0x7)) {
1508 DRM_ERROR("buffer prim %d\n", prim->prim);
1512 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1514 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1515 elt_buf->offset + prim->start);
1517 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1519 data[2] = prim->numverts;
1520 data[3] = prim->vc_format;
1521 data[4] = (prim->prim |
1522 RADEON_PRIM_WALK_IND |
1523 RADEON_COLOR_ORDER_RGBA |
1524 RADEON_VTX_FMT_RADEON_MODE |
1525 (count << RADEON_NUM_VERTICES_SHIFT));
1529 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1531 radeon_cp_dispatch_indirect(dev, elt_buf,
1532 prim->start, prim->finish);
1539 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1541 static int radeon_cp_dispatch_texture(DRMFILE filp,
1543 drm_radeon_texture_t * tex,
1544 drm_radeon_tex_image_t * image)
1546 drm_radeon_private_t *dev_priv = dev->dev_private;
1547 drm_file_t *filp_priv;
1551 const u8 __user *data;
1552 int size, dwords, tex_width, blit_width, spitch;
1555 u32 texpitch, microtile;
1559 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1561 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1562 DRM_ERROR("Invalid destination offset\n");
1563 return DRM_ERR(EINVAL);
1566 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1568 /* Flush the pixel cache. This ensures no pixel data gets mixed
1569 * up with the texture data from the host data blit, otherwise
1570 * part of the texture image may be corrupted.
1573 RADEON_FLUSH_CACHE();
1574 RADEON_WAIT_UNTIL_IDLE();
1577 /* The compiler won't optimize away a division by a variable,
1578 * even if the only legal values are powers of two. Thus, we'll
1579 * use a shift instead.
1581 switch (tex->format) {
1582 case RADEON_TXFORMAT_ARGB8888:
1583 case RADEON_TXFORMAT_RGBA8888:
1584 format = RADEON_COLOR_FORMAT_ARGB8888;
1585 tex_width = tex->width * 4;
1586 blit_width = image->width * 4;
1588 case RADEON_TXFORMAT_AI88:
1589 case RADEON_TXFORMAT_ARGB1555:
1590 case RADEON_TXFORMAT_RGB565:
1591 case RADEON_TXFORMAT_ARGB4444:
1592 case RADEON_TXFORMAT_VYUY422:
1593 case RADEON_TXFORMAT_YVYU422:
1594 format = RADEON_COLOR_FORMAT_RGB565;
1595 tex_width = tex->width * 2;
1596 blit_width = image->width * 2;
1598 case RADEON_TXFORMAT_I8:
1599 case RADEON_TXFORMAT_RGB332:
1600 format = RADEON_COLOR_FORMAT_CI8;
1601 tex_width = tex->width * 1;
1602 blit_width = image->width * 1;
1605 DRM_ERROR("invalid texture format %d\n", tex->format);
1606 return DRM_ERR(EINVAL);
1608 spitch = blit_width >> 6;
1609 if (spitch == 0 && image->height > 1)
1610 return DRM_ERR(EINVAL);
1612 texpitch = tex->pitch;
1613 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1615 if (tex_width < 64) {
1616 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1617 /* we got tiled coordinates, untile them */
1623 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1626 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1627 tex->offset >> 10, tex->pitch, tex->format,
1628 image->x, image->y, image->width, image->height);
1630 /* Make a copy of some parameters in case we have to
1631 * update them for a multi-pass texture blit.
1633 height = image->height;
1634 data = (const u8 __user *)image->data;
1636 size = height * blit_width;
1638 if (size > RADEON_MAX_TEXTURE_SIZE) {
1639 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1640 size = height * blit_width;
1641 } else if (size < 4 && size > 0) {
1643 } else if (size == 0) {
1647 buf = radeon_freelist_get(dev);
1649 radeon_do_cp_idle(dev_priv);
1650 buf = radeon_freelist_get(dev);
1653 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1654 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1655 return DRM_ERR(EFAULT);
1656 return DRM_ERR(EAGAIN);
1659 /* Dispatch the indirect buffer.
1662 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1665 #define RADEON_COPY_MT(_buf, _data, _width) \
1667 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1668 DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1669 return DRM_ERR(EFAULT); \
1674 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1675 however, we cannot use blitter directly for texture width < 64 bytes,
1676 since minimum tex pitch is 64 bytes and we need this to match
1677 the texture width, otherwise the blitter will tile it wrong.
1678 Thus, tiling manually in this case. Additionally, need to special
1679 case tex height = 1, since our actual image will have height 2
1680 and we need to ensure we don't read beyond the texture size
1682 if (tex->height == 1) {
1683 if (tex_width >= 64 || tex_width <= 16) {
1684 RADEON_COPY_MT(buffer, data,
1685 (int)(tex_width * sizeof(u32)));
1686 } else if (tex_width == 32) {
1687 RADEON_COPY_MT(buffer, data, 16);
1688 RADEON_COPY_MT(buffer + 8,
1691 } else if (tex_width >= 64 || tex_width == 16) {
1692 RADEON_COPY_MT(buffer, data,
1693 (int)(dwords * sizeof(u32)));
1694 } else if (tex_width < 16) {
1695 for (i = 0; i < tex->height; i++) {
1696 RADEON_COPY_MT(buffer, data, tex_width);
1700 } else if (tex_width == 32) {
1701 /* TODO: make sure this works when not fitting in one buffer
1702 (i.e. 32bytes x 2048...) */
1703 for (i = 0; i < tex->height; i += 2) {
1704 RADEON_COPY_MT(buffer, data, 16);
1706 RADEON_COPY_MT(buffer + 8, data, 16);
1708 RADEON_COPY_MT(buffer + 4, data, 16);
1710 RADEON_COPY_MT(buffer + 12, data, 16);
1716 if (tex_width >= 32) {
1717 /* Texture image width is larger than the minimum, so we
1718 * can upload it directly.
1720 RADEON_COPY_MT(buffer, data,
1721 (int)(dwords * sizeof(u32)));
1723 /* Texture image width is less than the minimum, so we
1724 * need to pad out each image scanline to the minimum
1727 for (i = 0; i < tex->height; i++) {
1728 RADEON_COPY_MT(buffer, data, tex_width);
1735 #undef RADEON_COPY_MT
1738 offset = dev_priv->gart_buffers_offset + buf->offset;
1740 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1741 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1742 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1743 RADEON_GMC_BRUSH_NONE |
1745 RADEON_GMC_SRC_DATATYPE_COLOR |
1747 RADEON_DP_SRC_SOURCE_MEMORY |
1748 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1749 OUT_RING((spitch << 22) | (offset >> 10));
1750 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1752 OUT_RING((image->x << 16) | image->y);
1753 OUT_RING((image->width << 16) | height);
1754 RADEON_WAIT_UNTIL_2D_IDLE();
1757 radeon_cp_discard_buffer(dev, buf);
1759 /* Update the input parameters for next time */
1761 image->height -= height;
1762 image->data = (const u8 __user *)image->data + size;
1763 } while (image->height > 0);
1765 /* Flush the pixel cache after the blit completes. This ensures
1766 * the texture data is written out to memory before rendering
1770 RADEON_FLUSH_CACHE();
1771 RADEON_WAIT_UNTIL_2D_IDLE();
1776 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1778 drm_radeon_private_t *dev_priv = dev->dev_private;
1785 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1786 OUT_RING(0x00000000);
1788 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1789 for (i = 0; i < 32; i++) {
1790 OUT_RING(stipple[i]);
1796 static void radeon_apply_surface_regs(int surf_index,
1797 drm_radeon_private_t *dev_priv)
1799 if (!dev_priv->mmio)
1802 radeon_do_cp_idle(dev_priv);
1804 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1805 dev_priv->surfaces[surf_index].flags);
1806 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1807 dev_priv->surfaces[surf_index].lower);
1808 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1809 dev_priv->surfaces[surf_index].upper);
1812 /* Allocates a virtual surface
1813 * doesn't always allocate a real surface, will stretch an existing
1814 * surface when possible.
1816 * Note that refcount can be at most 2, since during a free refcount=3
1817 * might mean we have to allocate a new surface which might not always
1819 * For example : we allocate three contigous surfaces ABC. If B is
1820 * freed, we suddenly need two surfaces to store A and C, which might
1821 * not always be available.
1823 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1824 drm_radeon_private_t *dev_priv, DRMFILE filp)
1826 struct radeon_virt_surface *s;
1828 int virt_surface_index;
1829 uint32_t new_upper, new_lower;
1831 new_lower = new->address;
1832 new_upper = new_lower + new->size - 1;
1835 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1836 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1837 RADEON_SURF_ADDRESS_FIXED_MASK)
1838 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1841 /* make sure there is no overlap with existing surfaces */
1842 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1843 if ((dev_priv->surfaces[i].refcount != 0) &&
1844 (((new_lower >= dev_priv->surfaces[i].lower) &&
1845 (new_lower < dev_priv->surfaces[i].upper)) ||
1846 ((new_lower < dev_priv->surfaces[i].lower) &&
1847 (new_upper > dev_priv->surfaces[i].lower)))) {
1852 /* find a virtual surface */
1853 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1854 if (dev_priv->virt_surfaces[i].filp == 0)
1856 if (i == 2 * RADEON_MAX_SURFACES) {
1859 virt_surface_index = i;
1861 /* try to reuse an existing surface */
1862 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1864 if ((dev_priv->surfaces[i].refcount == 1) &&
1865 (new->flags == dev_priv->surfaces[i].flags) &&
1866 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1867 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1868 s->surface_index = i;
1869 s->lower = new_lower;
1870 s->upper = new_upper;
1871 s->flags = new->flags;
1873 dev_priv->surfaces[i].refcount++;
1874 dev_priv->surfaces[i].lower = s->lower;
1875 radeon_apply_surface_regs(s->surface_index, dev_priv);
1876 return virt_surface_index;
1880 if ((dev_priv->surfaces[i].refcount == 1) &&
1881 (new->flags == dev_priv->surfaces[i].flags) &&
1882 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1883 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1884 s->surface_index = i;
1885 s->lower = new_lower;
1886 s->upper = new_upper;
1887 s->flags = new->flags;
1889 dev_priv->surfaces[i].refcount++;
1890 dev_priv->surfaces[i].upper = s->upper;
1891 radeon_apply_surface_regs(s->surface_index, dev_priv);
1892 return virt_surface_index;
1896 /* okay, we need a new one */
1897 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1898 if (dev_priv->surfaces[i].refcount == 0) {
1899 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1900 s->surface_index = i;
1901 s->lower = new_lower;
1902 s->upper = new_upper;
1903 s->flags = new->flags;
1905 dev_priv->surfaces[i].refcount = 1;
1906 dev_priv->surfaces[i].lower = s->lower;
1907 dev_priv->surfaces[i].upper = s->upper;
1908 dev_priv->surfaces[i].flags = s->flags;
1909 radeon_apply_surface_regs(s->surface_index, dev_priv);
1910 return virt_surface_index;
1914 /* we didn't find anything */
1918 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1921 struct radeon_virt_surface *s;
1923 /* find the virtual surface */
1924 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1925 s = &(dev_priv->virt_surfaces[i]);
1927 if ((lower == s->lower) && (filp == s->filp)) {
1928 if (dev_priv->surfaces[s->surface_index].
1930 dev_priv->surfaces[s->surface_index].
1933 if (dev_priv->surfaces[s->surface_index].
1935 dev_priv->surfaces[s->surface_index].
1938 dev_priv->surfaces[s->surface_index].refcount--;
1939 if (dev_priv->surfaces[s->surface_index].
1941 dev_priv->surfaces[s->surface_index].
1944 radeon_apply_surface_regs(s->surface_index,
1953 static void radeon_surfaces_release(DRMFILE filp,
1954 drm_radeon_private_t * dev_priv)
1957 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1958 if (dev_priv->virt_surfaces[i].filp == filp)
1959 free_surface(filp, dev_priv,
1960 dev_priv->virt_surfaces[i].lower);
1964 /* ================================================================
1967 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1970 drm_radeon_private_t *dev_priv = dev->dev_private;
1971 drm_radeon_surface_alloc_t alloc;
1974 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
1975 return DRM_ERR(EINVAL);
1978 DRM_COPY_FROM_USER_IOCTL(alloc,
1979 (drm_radeon_surface_alloc_t __user *) data,
1982 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1983 return DRM_ERR(EINVAL);
1988 static int radeon_surface_free(DRM_IOCTL_ARGS)
1991 drm_radeon_private_t *dev_priv = dev->dev_private;
1992 drm_radeon_surface_free_t memfree;
1995 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
1996 return DRM_ERR(EINVAL);
1999 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *) data,
2002 if (free_surface(filp, dev_priv, memfree.address))
2003 return DRM_ERR(EINVAL);
2008 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2011 drm_radeon_private_t *dev_priv = dev->dev_private;
2012 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2013 drm_radeon_clear_t clear;
2014 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2017 LOCK_TEST_WITH_RETURN(dev, filp);
2019 DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2022 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2024 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2025 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2027 if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2028 sarea_priv->nbox * sizeof(depth_boxes[0])))
2029 return DRM_ERR(EFAULT);
2031 radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2037 /* Not sure why this isn't set all the time:
2039 static int radeon_do_init_pageflip(drm_device_t * dev)
2041 drm_radeon_private_t *dev_priv = dev->dev_private;
2047 RADEON_WAIT_UNTIL_3D_IDLE();
2048 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2049 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2050 RADEON_CRTC_OFFSET_FLIP_CNTL);
2051 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2052 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2053 RADEON_CRTC_OFFSET_FLIP_CNTL);
2056 dev_priv->page_flipping = 1;
2057 dev_priv->current_page = 0;
2058 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2063 /* Called whenever a client dies, from drm_release.
2064 * NOTE: Lock isn't necessarily held when this is called!
2066 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2068 drm_radeon_private_t *dev_priv = dev->dev_private;
2071 if (dev_priv->current_page != 0)
2072 radeon_cp_dispatch_flip(dev);
2074 dev_priv->page_flipping = 0;
2078 /* Swapping and flipping are different operations, need different ioctls.
2079 * They can & should be intermixed to support multiple 3d windows.
2081 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2084 drm_radeon_private_t *dev_priv = dev->dev_private;
2087 LOCK_TEST_WITH_RETURN(dev, filp);
2089 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2091 if (!dev_priv->page_flipping)
2092 radeon_do_init_pageflip(dev);
2094 radeon_cp_dispatch_flip(dev);
2100 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2103 drm_radeon_private_t *dev_priv = dev->dev_private;
2104 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2107 LOCK_TEST_WITH_RETURN(dev, filp);
2109 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2111 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2112 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2114 radeon_cp_dispatch_swap(dev);
2115 dev_priv->sarea_priv->ctx_owner = 0;
2121 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2124 drm_radeon_private_t *dev_priv = dev->dev_private;
2125 drm_file_t *filp_priv;
2126 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2127 drm_device_dma_t *dma = dev->dma;
2129 drm_radeon_vertex_t vertex;
2130 drm_radeon_tcl_prim_t prim;
2132 LOCK_TEST_WITH_RETURN(dev, filp);
2135 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2136 return DRM_ERR(EINVAL);
2139 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2141 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2144 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2145 DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2147 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2148 DRM_ERROR("buffer index %d (of %d max)\n",
2149 vertex.idx, dma->buf_count - 1);
2150 return DRM_ERR(EINVAL);
2152 if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2153 DRM_ERROR("buffer prim %d\n", vertex.prim);
2154 return DRM_ERR(EINVAL);
2157 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2158 VB_AGE_TEST_WITH_RETURN(dev_priv);
2160 buf = dma->buflist[vertex.idx];
2162 if (buf->filp != filp) {
2163 DRM_ERROR("process %d using buffer owned by %p\n",
2164 DRM_CURRENTPID, buf->filp);
2165 return DRM_ERR(EINVAL);
2168 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2169 return DRM_ERR(EINVAL);
2172 /* Build up a prim_t record:
2175 buf->used = vertex.count; /* not used? */
2177 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2178 if (radeon_emit_state(dev_priv, filp_priv,
2179 &sarea_priv->context_state,
2180 sarea_priv->tex_state,
2181 sarea_priv->dirty)) {
2182 DRM_ERROR("radeon_emit_state failed\n");
2183 return DRM_ERR(EINVAL);
2186 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2187 RADEON_UPLOAD_TEX1IMAGES |
2188 RADEON_UPLOAD_TEX2IMAGES |
2189 RADEON_REQUIRE_QUIESCENCE);
2193 prim.finish = vertex.count; /* unused */
2194 prim.prim = vertex.prim;
2195 prim.numverts = vertex.count;
2196 prim.vc_format = dev_priv->sarea_priv->vc_format;
2198 radeon_cp_dispatch_vertex(dev, buf, &prim);
2201 if (vertex.discard) {
2202 radeon_cp_discard_buffer(dev, buf);
2209 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2212 drm_radeon_private_t *dev_priv = dev->dev_private;
2213 drm_file_t *filp_priv;
2214 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2215 drm_device_dma_t *dma = dev->dma;
2217 drm_radeon_indices_t elts;
2218 drm_radeon_tcl_prim_t prim;
2221 LOCK_TEST_WITH_RETURN(dev, filp);
2224 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2225 return DRM_ERR(EINVAL);
2228 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2230 DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2233 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2234 DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2236 if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2237 DRM_ERROR("buffer index %d (of %d max)\n",
2238 elts.idx, dma->buf_count - 1);
2239 return DRM_ERR(EINVAL);
2241 if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2242 DRM_ERROR("buffer prim %d\n", elts.prim);
2243 return DRM_ERR(EINVAL);
2246 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2247 VB_AGE_TEST_WITH_RETURN(dev_priv);
2249 buf = dma->buflist[elts.idx];
2251 if (buf->filp != filp) {
2252 DRM_ERROR("process %d using buffer owned by %p\n",
2253 DRM_CURRENTPID, buf->filp);
2254 return DRM_ERR(EINVAL);
2257 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2258 return DRM_ERR(EINVAL);
2261 count = (elts.end - elts.start) / sizeof(u16);
2262 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2264 if (elts.start & 0x7) {
2265 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2266 return DRM_ERR(EINVAL);
2268 if (elts.start < buf->used) {
2269 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2270 return DRM_ERR(EINVAL);
2273 buf->used = elts.end;
2275 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2276 if (radeon_emit_state(dev_priv, filp_priv,
2277 &sarea_priv->context_state,
2278 sarea_priv->tex_state,
2279 sarea_priv->dirty)) {
2280 DRM_ERROR("radeon_emit_state failed\n");
2281 return DRM_ERR(EINVAL);
2284 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2285 RADEON_UPLOAD_TEX1IMAGES |
2286 RADEON_UPLOAD_TEX2IMAGES |
2287 RADEON_REQUIRE_QUIESCENCE);
2290 /* Build up a prim_t record:
2292 prim.start = elts.start;
2293 prim.finish = elts.end;
2294 prim.prim = elts.prim;
2295 prim.offset = 0; /* offset from start of dma buffers */
2296 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2297 prim.vc_format = dev_priv->sarea_priv->vc_format;
2299 radeon_cp_dispatch_indices(dev, buf, &prim);
2301 radeon_cp_discard_buffer(dev, buf);
2308 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2311 drm_radeon_private_t *dev_priv = dev->dev_private;
2312 drm_radeon_texture_t tex;
2313 drm_radeon_tex_image_t image;
2316 LOCK_TEST_WITH_RETURN(dev, filp);
2318 DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2321 if (tex.image == NULL) {
2322 DRM_ERROR("null texture image!\n");
2323 return DRM_ERR(EINVAL);
2326 if (DRM_COPY_FROM_USER(&image,
2327 (drm_radeon_tex_image_t __user *) tex.image,
2329 return DRM_ERR(EFAULT);
2331 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2332 VB_AGE_TEST_WITH_RETURN(dev_priv);
2334 ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2340 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2343 drm_radeon_private_t *dev_priv = dev->dev_private;
2344 drm_radeon_stipple_t stipple;
2347 LOCK_TEST_WITH_RETURN(dev, filp);
2349 DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2352 if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2353 return DRM_ERR(EFAULT);
2355 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2357 radeon_cp_dispatch_stipple(dev, mask);
2363 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2366 drm_radeon_private_t *dev_priv = dev->dev_private;
2367 drm_device_dma_t *dma = dev->dma;
2369 drm_radeon_indirect_t indirect;
2372 LOCK_TEST_WITH_RETURN(dev, filp);
2375 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2376 return DRM_ERR(EINVAL);
2379 DRM_COPY_FROM_USER_IOCTL(indirect,
2380 (drm_radeon_indirect_t __user *) data,
2383 DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2384 indirect.idx, indirect.start, indirect.end, indirect.discard);
2386 if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2387 DRM_ERROR("buffer index %d (of %d max)\n",
2388 indirect.idx, dma->buf_count - 1);
2389 return DRM_ERR(EINVAL);
2392 buf = dma->buflist[indirect.idx];
2394 if (buf->filp != filp) {
2395 DRM_ERROR("process %d using buffer owned by %p\n",
2396 DRM_CURRENTPID, buf->filp);
2397 return DRM_ERR(EINVAL);
2400 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2401 return DRM_ERR(EINVAL);
2404 if (indirect.start < buf->used) {
2405 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2406 indirect.start, buf->used);
2407 return DRM_ERR(EINVAL);
2410 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2411 VB_AGE_TEST_WITH_RETURN(dev_priv);
2413 buf->used = indirect.end;
2415 /* Wait for the 3D stream to idle before the indirect buffer
2416 * containing 2D acceleration commands is processed.
2420 RADEON_WAIT_UNTIL_3D_IDLE();
2424 /* Dispatch the indirect buffer full of commands from the
2425 * X server. This is insecure and is thus only available to
2426 * privileged clients.
2428 radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2429 if (indirect.discard) {
2430 radeon_cp_discard_buffer(dev, buf);
2437 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2440 drm_radeon_private_t *dev_priv = dev->dev_private;
2441 drm_file_t *filp_priv;
2442 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2443 drm_device_dma_t *dma = dev->dma;
2445 drm_radeon_vertex2_t vertex;
2447 unsigned char laststate;
2449 LOCK_TEST_WITH_RETURN(dev, filp);
2452 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2453 return DRM_ERR(EINVAL);
2456 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2458 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2461 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2462 DRM_CURRENTPID, vertex.idx, vertex.discard);
2464 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2465 DRM_ERROR("buffer index %d (of %d max)\n",
2466 vertex.idx, dma->buf_count - 1);
2467 return DRM_ERR(EINVAL);
2470 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2471 VB_AGE_TEST_WITH_RETURN(dev_priv);
2473 buf = dma->buflist[vertex.idx];
2475 if (buf->filp != filp) {
2476 DRM_ERROR("process %d using buffer owned by %p\n",
2477 DRM_CURRENTPID, buf->filp);
2478 return DRM_ERR(EINVAL);
2482 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2483 return DRM_ERR(EINVAL);
2486 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2487 return DRM_ERR(EINVAL);
2489 for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2490 drm_radeon_prim_t prim;
2491 drm_radeon_tcl_prim_t tclprim;
2493 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2494 return DRM_ERR(EFAULT);
2496 if (prim.stateidx != laststate) {
2497 drm_radeon_state_t state;
2499 if (DRM_COPY_FROM_USER(&state,
2500 &vertex.state[prim.stateidx],
2502 return DRM_ERR(EFAULT);
2504 if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2505 DRM_ERROR("radeon_emit_state2 failed\n");
2506 return DRM_ERR(EINVAL);
2509 laststate = prim.stateidx;
2512 tclprim.start = prim.start;
2513 tclprim.finish = prim.finish;
2514 tclprim.prim = prim.prim;
2515 tclprim.vc_format = prim.vc_format;
2517 if (prim.prim & RADEON_PRIM_WALK_IND) {
2518 tclprim.offset = prim.numverts * 64;
2519 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2521 radeon_cp_dispatch_indices(dev, buf, &tclprim);
2523 tclprim.numverts = prim.numverts;
2524 tclprim.offset = 0; /* not used */
2526 radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2529 if (sarea_priv->nbox == 1)
2530 sarea_priv->nbox = 0;
2533 if (vertex.discard) {
2534 radeon_cp_discard_buffer(dev, buf);
2541 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2542 drm_file_t * filp_priv,
2543 drm_radeon_cmd_header_t header,
2544 drm_radeon_kcmd_buffer_t *cmdbuf)
2546 int id = (int)header.packet.packet_id;
2548 int *data = (int *)cmdbuf->buf;
2551 if (id >= RADEON_MAX_STATE_PACKETS)
2552 return DRM_ERR(EINVAL);
2554 sz = packet[id].len;
2555 reg = packet[id].start;
2557 if (sz * sizeof(int) > cmdbuf->bufsz) {
2558 DRM_ERROR("Packet size provided larger than data provided\n");
2559 return DRM_ERR(EINVAL);
2562 if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2563 DRM_ERROR("Packet verification failed\n");
2564 return DRM_ERR(EINVAL);
2568 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2569 OUT_RING_TABLE(data, sz);
2572 cmdbuf->buf += sz * sizeof(int);
2573 cmdbuf->bufsz -= sz * sizeof(int);
2577 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2578 drm_radeon_cmd_header_t header,
2579 drm_radeon_kcmd_buffer_t *cmdbuf)
2581 int sz = header.scalars.count;
2582 int start = header.scalars.offset;
2583 int stride = header.scalars.stride;
2587 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2588 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2589 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2590 OUT_RING_TABLE(cmdbuf->buf, sz);
2592 cmdbuf->buf += sz * sizeof(int);
2593 cmdbuf->bufsz -= sz * sizeof(int);
2599 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2600 drm_radeon_cmd_header_t header,
2601 drm_radeon_kcmd_buffer_t *cmdbuf)
2603 int sz = header.scalars.count;
2604 int start = ((unsigned int)header.scalars.offset) + 0x100;
2605 int stride = header.scalars.stride;
2609 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2610 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2611 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2612 OUT_RING_TABLE(cmdbuf->buf, sz);
2614 cmdbuf->buf += sz * sizeof(int);
2615 cmdbuf->bufsz -= sz * sizeof(int);
2619 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2620 drm_radeon_cmd_header_t header,
2621 drm_radeon_kcmd_buffer_t *cmdbuf)
2623 int sz = header.vectors.count;
2624 int start = header.vectors.offset;
2625 int stride = header.vectors.stride;
2629 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2630 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2631 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2632 OUT_RING_TABLE(cmdbuf->buf, sz);
2635 cmdbuf->buf += sz * sizeof(int);
2636 cmdbuf->bufsz -= sz * sizeof(int);
2640 static int radeon_emit_packet3(drm_device_t * dev,
2641 drm_file_t * filp_priv,
2642 drm_radeon_kcmd_buffer_t *cmdbuf)
2644 drm_radeon_private_t *dev_priv = dev->dev_private;
2651 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2653 DRM_ERROR("Packet verification failed\n");
2658 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2661 cmdbuf->buf += cmdsz * 4;
2662 cmdbuf->bufsz -= cmdsz * 4;
2666 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2667 drm_file_t *filp_priv,
2668 drm_radeon_kcmd_buffer_t *cmdbuf,
2671 drm_radeon_private_t *dev_priv = dev->dev_private;
2672 drm_clip_rect_t box;
2675 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2681 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2683 DRM_ERROR("Packet verification failed\n");
2691 if (i < cmdbuf->nbox) {
2692 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2693 return DRM_ERR(EFAULT);
2694 /* FIXME The second and subsequent times round
2695 * this loop, send a WAIT_UNTIL_3D_IDLE before
2696 * calling emit_clip_rect(). This fixes a
2697 * lockup on fast machines when sending
2698 * several cliprects with a cmdbuf, as when
2699 * waving a 2D window over a 3D
2700 * window. Something in the commands from user
2701 * space seems to hang the card when they're
2702 * sent several times in a row. That would be
2703 * the correct place to fix it but this works
2704 * around it until I can figure that out - Tim
2708 RADEON_WAIT_UNTIL_3D_IDLE();
2711 radeon_emit_clip_rect(dev_priv, &box);
2715 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2718 } while (++i < cmdbuf->nbox);
2719 if (cmdbuf->nbox == 1)
2723 cmdbuf->buf += cmdsz * 4;
2724 cmdbuf->bufsz -= cmdsz * 4;
2728 static int radeon_emit_wait(drm_device_t * dev, int flags)
2730 drm_radeon_private_t *dev_priv = dev->dev_private;
2733 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2735 case RADEON_WAIT_2D:
2737 RADEON_WAIT_UNTIL_2D_IDLE();
2740 case RADEON_WAIT_3D:
2742 RADEON_WAIT_UNTIL_3D_IDLE();
2745 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2747 RADEON_WAIT_UNTIL_IDLE();
2751 return DRM_ERR(EINVAL);
2757 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2760 drm_radeon_private_t *dev_priv = dev->dev_private;
2761 drm_file_t *filp_priv;
2762 drm_device_dma_t *dma = dev->dma;
2763 drm_buf_t *buf = NULL;
2765 drm_radeon_kcmd_buffer_t cmdbuf;
2766 drm_radeon_cmd_header_t header;
2767 int orig_nbox, orig_bufsz;
2770 LOCK_TEST_WITH_RETURN(dev, filp);
2773 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2774 return DRM_ERR(EINVAL);
2777 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2779 DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2780 (drm_radeon_cmd_buffer_t __user *) data,
2783 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2784 VB_AGE_TEST_WITH_RETURN(dev_priv);
2786 if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2787 return DRM_ERR(EINVAL);
2790 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2791 * races between checking values and using those values in other code,
2792 * and simply to avoid a lot of function calls to copy in data.
2794 orig_bufsz = cmdbuf.bufsz;
2795 if (orig_bufsz != 0) {
2796 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2798 return DRM_ERR(ENOMEM);
2799 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2801 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2802 return DRM_ERR(EFAULT);
2807 orig_nbox = cmdbuf.nbox;
2809 if (dev_priv->microcode_version == UCODE_R300) {
2811 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2813 if (orig_bufsz != 0)
2814 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2819 /* microcode_version != r300 */
2820 while (cmdbuf.bufsz >= sizeof(header)) {
2822 header.i = *(int *)cmdbuf.buf;
2823 cmdbuf.buf += sizeof(header);
2824 cmdbuf.bufsz -= sizeof(header);
2826 switch (header.header.cmd_type) {
2827 case RADEON_CMD_PACKET:
2828 DRM_DEBUG("RADEON_CMD_PACKET\n");
2829 if (radeon_emit_packets
2830 (dev_priv, filp_priv, header, &cmdbuf)) {
2831 DRM_ERROR("radeon_emit_packets failed\n");
2836 case RADEON_CMD_SCALARS:
2837 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2838 if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2839 DRM_ERROR("radeon_emit_scalars failed\n");
2844 case RADEON_CMD_VECTORS:
2845 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2846 if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2847 DRM_ERROR("radeon_emit_vectors failed\n");
2852 case RADEON_CMD_DMA_DISCARD:
2853 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2854 idx = header.dma.buf_idx;
2855 if (idx < 0 || idx >= dma->buf_count) {
2856 DRM_ERROR("buffer index %d (of %d max)\n",
2857 idx, dma->buf_count - 1);
2861 buf = dma->buflist[idx];
2862 if (buf->filp != filp || buf->pending) {
2863 DRM_ERROR("bad buffer %p %p %d\n",
2864 buf->filp, filp, buf->pending);
2868 radeon_cp_discard_buffer(dev, buf);
2871 case RADEON_CMD_PACKET3:
2872 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2873 if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2874 DRM_ERROR("radeon_emit_packet3 failed\n");
2879 case RADEON_CMD_PACKET3_CLIP:
2880 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2881 if (radeon_emit_packet3_cliprect
2882 (dev, filp_priv, &cmdbuf, orig_nbox)) {
2883 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2888 case RADEON_CMD_SCALARS2:
2889 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2890 if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2891 DRM_ERROR("radeon_emit_scalars2 failed\n");
2896 case RADEON_CMD_WAIT:
2897 DRM_DEBUG("RADEON_CMD_WAIT\n");
2898 if (radeon_emit_wait(dev, header.wait.flags)) {
2899 DRM_ERROR("radeon_emit_wait failed\n");
2904 DRM_ERROR("bad cmd_type %d at %p\n",
2905 header.header.cmd_type,
2906 cmdbuf.buf - sizeof(header));
2911 if (orig_bufsz != 0)
2912 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2914 DRM_DEBUG("DONE\n");
2919 if (orig_bufsz != 0)
2920 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2921 return DRM_ERR(EINVAL);
2924 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2927 drm_radeon_private_t *dev_priv = dev->dev_private;
2928 drm_radeon_getparam_t param;
2932 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2933 return DRM_ERR(EINVAL);
2936 DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2939 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2941 switch (param.param) {
2942 case RADEON_PARAM_GART_BUFFER_OFFSET:
2943 value = dev_priv->gart_buffers_offset;
2945 case RADEON_PARAM_LAST_FRAME:
2946 dev_priv->stats.last_frame_reads++;
2947 value = GET_SCRATCH(0);
2949 case RADEON_PARAM_LAST_DISPATCH:
2950 value = GET_SCRATCH(1);
2952 case RADEON_PARAM_LAST_CLEAR:
2953 dev_priv->stats.last_clear_reads++;
2954 value = GET_SCRATCH(2);
2956 case RADEON_PARAM_IRQ_NR:
2959 case RADEON_PARAM_GART_BASE:
2960 value = dev_priv->gart_vm_start;
2962 case RADEON_PARAM_REGISTER_HANDLE:
2963 value = dev_priv->mmio->offset;
2965 case RADEON_PARAM_STATUS_HANDLE:
2966 value = dev_priv->ring_rptr_offset;
2968 #if BITS_PER_LONG == 32
2970 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2971 * pointer which can't fit into an int-sized variable. According to
2972 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2973 * not supporting it shouldn't be a problem. If the same functionality
2974 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2975 * so backwards-compatibility for the embedded platforms can be
2976 * maintained. --davidm 4-Feb-2004.
2978 case RADEON_PARAM_SAREA_HANDLE:
2979 /* The lock is the first dword in the sarea. */
2980 value = (long)dev->lock.hw_lock;
2983 case RADEON_PARAM_GART_TEX_HANDLE:
2984 value = dev_priv->gart_textures_offset;
2987 case RADEON_PARAM_CARD_TYPE:
2988 if (dev_priv->flags & CHIP_IS_PCIE)
2989 value = RADEON_CARD_PCIE;
2990 else if (dev_priv->flags & CHIP_IS_AGP)
2991 value = RADEON_CARD_AGP;
2993 value = RADEON_CARD_PCI;
2996 return DRM_ERR(EINVAL);
2999 if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3000 DRM_ERROR("copy_to_user\n");
3001 return DRM_ERR(EFAULT);
3007 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3010 drm_radeon_private_t *dev_priv = dev->dev_private;
3011 drm_file_t *filp_priv;
3012 drm_radeon_setparam_t sp;
3013 struct drm_radeon_driver_file_fields *radeon_priv;
3016 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
3017 return DRM_ERR(EINVAL);
3020 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3022 DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3026 case RADEON_SETPARAM_FB_LOCATION:
3027 radeon_priv = filp_priv->driver_priv;
3028 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3030 case RADEON_SETPARAM_SWITCH_TILING:
3031 if (sp.value == 0) {
3032 DRM_DEBUG("color tiling disabled\n");
3033 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3034 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3035 dev_priv->sarea_priv->tiling_enabled = 0;
3036 } else if (sp.value == 1) {
3037 DRM_DEBUG("color tiling enabled\n");
3038 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3039 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3040 dev_priv->sarea_priv->tiling_enabled = 1;
3043 case RADEON_SETPARAM_PCIGART_LOCATION:
3044 dev_priv->pcigart_offset = sp.value;
3046 case RADEON_SETPARAM_NEW_MEMMAP:
3047 dev_priv->new_memmap = sp.value;
3050 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3051 return DRM_ERR(EINVAL);
3057 /* When a client dies:
3058 * - Check for and clean up flipped page state
3059 * - Free any alloced GART memory.
3060 * - Free any alloced radeon surfaces.
3062 * DRM infrastructure takes care of reclaiming dma buffers.
3064 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3066 if (dev->dev_private) {
3067 drm_radeon_private_t *dev_priv = dev->dev_private;
3068 if (dev_priv->page_flipping) {
3069 radeon_do_cleanup_pageflip(dev);
3071 radeon_mem_release(filp, dev_priv->gart_heap);
3072 radeon_mem_release(filp, dev_priv->fb_heap);
3073 radeon_surfaces_release(filp, dev_priv);
3077 void radeon_driver_lastclose(drm_device_t * dev)
3079 radeon_do_release(dev);
3082 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3084 drm_radeon_private_t *dev_priv = dev->dev_private;
3085 struct drm_radeon_driver_file_fields *radeon_priv;
3089 (struct drm_radeon_driver_file_fields *)
3090 drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3095 filp_priv->driver_priv = radeon_priv;
3098 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3100 radeon_priv->radeon_fb_delta = 0;
3104 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3106 struct drm_radeon_driver_file_fields *radeon_priv =
3107 filp_priv->driver_priv;
3109 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3112 drm_ioctl_desc_t radeon_ioctls[] = {
3113 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3114 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3115 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3116 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3117 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3118 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3119 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3120 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3121 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3122 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3123 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3124 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3125 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3126 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3127 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3128 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3129 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3130 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3131 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3132 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3133 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3134 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3135 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3136 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3137 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3138 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3139 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3142 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);