drm/i915: add fence register management to execbuf
authorJesse Barnes <jbarnes@virtuousgeek.org>
Tue, 27 Jan 2009 01:10:45 +0000 (17:10 -0800)
committerDave Airlie <airlied@linux.ie>
Sun, 8 Feb 2009 11:38:02 +0000 (21:38 +1000)
Adds code to set up fence registers at execbuf time on pre-965 chips as
necessary.  Also fixes up a few bugs in the pre-965 tile register support
(get_order != ffs).  The number of fences available to the kernel defaults
to the hw limit minus 3 (for legacy X front/back/depth), but a new parameter
allows userspace to override that as needed.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@linux.ie>
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_tiling.c
drivers/gpu/drm/i915/i915_reg.h
include/drm/i915_drm.h

index 1e01e78..cc0adb4 100644 (file)
@@ -731,6 +731,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
        case I915_PARAM_HAS_GEM:
                value = dev_priv->has_gem;
                break;
+       case I915_PARAM_NUM_FENCES_AVAIL:
+               value = dev_priv->num_fence_regs - dev_priv->fence_reg_start;
+               break;
        default:
                DRM_ERROR("Unknown parameter %d\n", param->param);
                return -EINVAL;
@@ -764,6 +767,13 @@ static int i915_setparam(struct drm_device *dev, void *data,
        case I915_SETPARAM_ALLOW_BATCHBUFFER:
                dev_priv->allow_batchbuffer = param->value;
                break;
+       case I915_SETPARAM_NUM_USED_FENCES:
+               if (param->value > dev_priv->num_fence_regs ||
+                   param->value < 0)
+                       return -EINVAL;
+               /* Userspace can use first N regs */
+               dev_priv->fence_reg_start = param->value;
+               break;
        default:
                DRM_ERROR("unknown parameter %d\n", param->param);
                return -EINVAL;
index f471d21..a70bf77 100644 (file)
@@ -602,6 +602,7 @@ int i915_gem_init_object(struct drm_gem_object *obj);
 void i915_gem_free_object(struct drm_gem_object *obj);
 int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment);
 void i915_gem_object_unpin(struct drm_gem_object *obj);
+int i915_gem_object_unbind(struct drm_gem_object *obj);
 void i915_gem_lastclose(struct drm_device *dev);
 uint32_t i915_get_gem_seqno(struct drm_device *dev);
 void i915_gem_retire_requests(struct drm_device *dev);
@@ -785,6 +786,11 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
                        IS_I945GM(dev) || IS_I965GM(dev) || IS_GM45(dev))
 
 #define I915_NEED_GFX_HWS(dev) (IS_G33(dev) || IS_GM45(dev) || IS_G4X(dev))
+/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
+ * rows, which changed the alignment requirements and fence programming.
+ */
+#define HAS_128_BYTE_Y_TILING(dev) (IS_I9XX(dev) && !(IS_I915G(dev) || \
+                                                     IS_I915GM(dev)))
 #define SUPPORTS_INTEGRATED_HDMI(dev)  (IS_G4X(dev))
 
 #define PRIMARY_RINGBUFFER_SIZE         (128*1024)
index e1f831f..6a9e3a8 100644 (file)
@@ -52,7 +52,7 @@ static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
                                           unsigned alignment);
-static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
+static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write);
 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_evict_something(struct drm_device *dev);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
@@ -567,6 +567,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        pgoff_t page_offset;
        unsigned long pfn;
        int ret = 0;
+       bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 
        /* We don't use vmf->pgoff since that has the fake offset */
        page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
@@ -586,7 +587,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        /* Need a new fence register? */
        if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
            obj_priv->tiling_mode != I915_TILING_NONE) {
-               ret = i915_gem_object_get_fence_reg(obj);
+               ret = i915_gem_object_get_fence_reg(obj, write);
                if (ret != 0)
                        return VM_FAULT_SIGBUS;
        }
@@ -1214,7 +1215,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
 /**
  * Unbinds an object from the GTT aperture.
  */
-static int
+int
 i915_gem_object_unbind(struct drm_gem_object *obj)
 {
        struct drm_device *dev = obj->dev;
@@ -1448,21 +1449,26 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
        drm_i915_private_t *dev_priv = dev->dev_private;
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
        int regnum = obj_priv->fence_reg;
+       int tile_width;
        uint32_t val;
        uint32_t pitch_val;
 
        if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
            (obj_priv->gtt_offset & (obj->size - 1))) {
-               WARN(1, "%s: object not 1M or size aligned\n", __func__);
+               WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n",
+                    __func__, obj_priv->gtt_offset, obj->size);
                return;
        }
 
-       if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) ||
-                                                      IS_I945GM(dev) ||
-                                                      IS_G33(dev)))
-               pitch_val = (obj_priv->stride / 128) - 1;
+       if (obj_priv->tiling_mode == I915_TILING_Y &&
+           HAS_128_BYTE_Y_TILING(dev))
+               tile_width = 128;
        else
-               pitch_val = (obj_priv->stride / 512) - 1;
+               tile_width = 512;
+
+       /* Note: pitch better be a power of two tile widths */
+       pitch_val = obj_priv->stride / tile_width;
+       pitch_val = ffs(pitch_val) - 1;
 
        val = obj_priv->gtt_offset;
        if (obj_priv->tiling_mode == I915_TILING_Y)
@@ -1486,7 +1492,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
 
        if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
            (obj_priv->gtt_offset & (obj->size - 1))) {
-               WARN(1, "%s: object not 1M or size aligned\n", __func__);
+               WARN(1, "%s: object 0x%08x not 1M or size aligned\n",
+                    __func__, obj_priv->gtt_offset);
                return;
        }
 
@@ -1506,6 +1513,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
 /**
  * i915_gem_object_get_fence_reg - set up a fence reg for an object
  * @obj: object to map through a fence reg
+ * @write: object is about to be written
  *
  * When mapping objects through the GTT, userspace wants to be able to write
  * to them without having to worry about swizzling if the object is tiled.
@@ -1517,7 +1525,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
  * and tiling format.
  */
 static int
-i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
+i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write)
 {
        struct drm_device *dev = obj->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1530,12 +1538,18 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
                WARN(1, "allocating a fence for non-tiled object?\n");
                break;
        case I915_TILING_X:
-               WARN(obj_priv->stride & (512 - 1),
-                    "object is X tiled but has non-512B pitch\n");
+               if (!obj_priv->stride)
+                       return -EINVAL;
+               WARN((obj_priv->stride & (512 - 1)),
+                    "object 0x%08x is X tiled but has non-512B pitch\n",
+                    obj_priv->gtt_offset);
                break;
        case I915_TILING_Y:
-               WARN(obj_priv->stride & (128 - 1),
-                    "object is Y tiled but has non-128B pitch\n");
+               if (!obj_priv->stride)
+                       return -EINVAL;
+               WARN((obj_priv->stride & (128 - 1)),
+                    "object 0x%08x is Y tiled but has non-128B pitch\n",
+                    obj_priv->gtt_offset);
                break;
        }
 
@@ -1637,7 +1651,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
        if (dev_priv->mm.suspended)
                return -EBUSY;
        if (alignment == 0)
-               alignment = PAGE_SIZE;
+               alignment = i915_gem_get_gtt_alignment(obj);
        if (alignment & (PAGE_SIZE - 1)) {
                DRM_ERROR("Invalid object alignment requested %u\n", alignment);
                return -EINVAL;
@@ -2658,6 +2672,14 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
                                DRM_ERROR("Failure to bind: %d", ret);
                        return ret;
                }
+               /*
+                * Pre-965 chips need a fence register set up in order to
+                * properly handle tiled surfaces.
+                */
+               if (!IS_I965G(dev) &&
+                   obj_priv->fence_reg == I915_FENCE_REG_NONE &&
+                   obj_priv->tiling_mode != I915_TILING_NONE)
+                       i915_gem_object_get_fence_reg(obj, true);
        }
        obj_priv->pin_count++;
 
@@ -3297,7 +3319,7 @@ i915_gem_load(struct drm_device *dev)
        /* Old X drivers will take 0-2 for front, back, depth buffers */
        dev_priv->fence_reg_start = 3;
 
-       if (IS_I965G(dev))
+       if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
                dev_priv->num_fence_regs = 16;
        else
                dev_priv->num_fence_regs = 8;
index 241f39b..2534c79 100644 (file)
@@ -173,6 +173,73 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
        dev_priv->mm.bit_6_swizzle_y = swizzle_y;
 }
 
+
+/**
+ * Returns the size of the fence for a tiled object of the given size.
+ */
+static int
+i915_get_fence_size(struct drm_device *dev, int size)
+{
+       int i;
+       int start;
+
+       if (IS_I965G(dev)) {
+               /* The 965 can have fences at any page boundary. */
+               return ALIGN(size, 4096);
+       } else {
+               /* Align the size to a power of two greater than the smallest
+                * fence size.
+                */
+               if (IS_I9XX(dev))
+                       start = 1024 * 1024;
+               else
+                       start = 512 * 1024;
+
+               for (i = start; i < size; i <<= 1)
+                       ;
+
+               return i;
+       }
+}
+
+/* Check pitch constriants for all chips & tiling formats */
+static bool
+i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
+{
+       int tile_width;
+
+       /* Linear is always fine */
+       if (tiling_mode == I915_TILING_NONE)
+               return true;
+
+       if (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
+               tile_width = 128;
+       else
+               tile_width = 512;
+
+       /* 965+ just needs multiples of tile width */
+       if (IS_I965G(dev)) {
+               if (stride & (tile_width - 1))
+                       return false;
+               return true;
+       }
+
+       /* Pre-965 needs power of two tile widths */
+       if (stride < tile_width)
+               return false;
+
+       if (stride & (stride - 1))
+               return false;
+
+       /* We don't handle the aperture area covered by the fence being bigger
+        * than the object size.
+        */
+       if (i915_get_fence_size(dev, size) != size)
+               return false;
+
+       return true;
+}
+
 /**
  * Sets the tiling mode of an object, returning the required swizzling of
  * bit 6 of addresses in the object.
@@ -191,6 +258,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
                return -EINVAL;
        obj_priv = obj->driver_private;
 
+       if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode))
+               return -EINVAL;
+
        mutex_lock(&dev->struct_mutex);
 
        if (args->tiling_mode == I915_TILING_NONE) {
@@ -207,7 +277,23 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
                        args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
                }
        }
-       obj_priv->tiling_mode = args->tiling_mode;
+       if (args->tiling_mode != obj_priv->tiling_mode) {
+               int ret;
+
+               /* Unbind the object, as switching tiling means we're
+                * switching the cache organization due to fencing, probably.
+                */
+               ret = i915_gem_object_unbind(obj);
+               if (ret != 0) {
+                       WARN(ret != -ERESTARTSYS,
+                            "failed to unbind object for tiling switch");
+                       args->tiling_mode = obj_priv->tiling_mode;
+                       mutex_unlock(&dev->struct_mutex);
+
+                       return ret;
+               }
+               obj_priv->tiling_mode = args->tiling_mode;
+       }
        obj_priv->stride = args->stride;
 
        mutex_unlock(&dev->struct_mutex);
index 2731625..928e004 100644 (file)
 #define FENCE_REG_830_0                        0x2000
 #define   I830_FENCE_START_MASK                0x07f80000
 #define   I830_FENCE_TILING_Y_SHIFT    12
-#define   I830_FENCE_SIZE_BITS(size)   ((get_order(size >> 19) - 1) << 8)
+#define   I830_FENCE_SIZE_BITS(size)   ((ffs((size) >> 19) - 1) << 8)
 #define   I830_FENCE_PITCH_SHIFT       4
 #define   I830_FENCE_REG_VALID         (1<<0)
 
 #define   I915_FENCE_START_MASK                0x0ff00000
-#define   I915_FENCE_SIZE_BITS(size)   ((get_order(size >> 20) - 1) << 8)
+#define   I915_FENCE_SIZE_BITS(size)   ((ffs((size) >> 20) - 1) << 8)
 
 #define FENCE_REG_965_0                        0x03000
 #define   I965_FENCE_PITCH_SHIFT       2
index b3bcf72..912cd52 100644 (file)
@@ -261,6 +261,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_LAST_DISPATCH         3
 #define I915_PARAM_CHIPSET_ID            4
 #define I915_PARAM_HAS_GEM               5
+#define I915_PARAM_NUM_FENCES_AVAIL      6
 
 typedef struct drm_i915_getparam {
        int param;
@@ -272,6 +273,7 @@ typedef struct drm_i915_getparam {
 #define I915_SETPARAM_USE_MI_BATCHBUFFER_START            1
 #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY             2
 #define I915_SETPARAM_ALLOW_BATCHBUFFER                   3
+#define I915_SETPARAM_NUM_USED_FENCES                     4
 
 typedef struct drm_i915_setparam {
        int param;