#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
+#include "intel_drv.h"
#include <linux/swap.h>
#include <linux/pci.h>
static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
unsigned alignment);
-static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write);
static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
static int i915_gem_evict_something(struct drm_device *dev);
static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
{
struct drm_i915_gem_create *args = data;
struct drm_gem_object *obj;
- int handle, ret;
+ int ret;
+ u32 handle;
args->size = roundup(args->size, PAGE_SIZE);
return 0;
}
+static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
+{
+ drm_i915_private_t *dev_priv = obj->dev->dev_private;
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+ return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+ obj_priv->tiling_mode != I915_TILING_NONE;
+}
+
static inline int
slow_shmem_copy(struct page *dst_page,
int dst_offset,
return 0;
}
+static inline int
+slow_shmem_bit17_copy(struct page *gpu_page,
+ int gpu_offset,
+ struct page *cpu_page,
+ int cpu_offset,
+ int length,
+ int is_read)
+{
+ char *gpu_vaddr, *cpu_vaddr;
+
+ /* Use the unswizzled path if this page isn't affected. */
+ if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
+ if (is_read)
+ return slow_shmem_copy(cpu_page, cpu_offset,
+ gpu_page, gpu_offset, length);
+ else
+ return slow_shmem_copy(gpu_page, gpu_offset,
+ cpu_page, cpu_offset, length);
+ }
+
+ gpu_vaddr = kmap_atomic(gpu_page, KM_USER0);
+ if (gpu_vaddr == NULL)
+ return -ENOMEM;
+
+ cpu_vaddr = kmap_atomic(cpu_page, KM_USER1);
+ if (cpu_vaddr == NULL) {
+ kunmap_atomic(gpu_vaddr, KM_USER0);
+ return -ENOMEM;
+ }
+
+ /* Copy the data, XORing A6 with A17 (1). The user already knows he's
+ * XORing with the other bits (A9 for Y, A9 and A10 for X)
+ */
+ while (length > 0) {
+ int cacheline_end = ALIGN(gpu_offset + 1, 64);
+ int this_length = min(cacheline_end - gpu_offset, length);
+ int swizzled_gpu_offset = gpu_offset ^ 64;
+
+ if (is_read) {
+ memcpy(cpu_vaddr + cpu_offset,
+ gpu_vaddr + swizzled_gpu_offset,
+ this_length);
+ } else {
+ memcpy(gpu_vaddr + swizzled_gpu_offset,
+ cpu_vaddr + cpu_offset,
+ this_length);
+ }
+ cpu_offset += this_length;
+ gpu_offset += this_length;
+ length -= this_length;
+ }
+
+ kunmap_atomic(cpu_vaddr, KM_USER1);
+ kunmap_atomic(gpu_vaddr, KM_USER0);
+
+ return 0;
+}
+
/**
* This is the fast shmem pread path, which attempts to copy_from_user directly
* from the backing pages of the object to the user's address space. On a
int page_length;
int ret;
uint64_t data_ptr = args->data_ptr;
+ int do_bit17_swizzling;
remain = args->size;
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
num_pages = last_data_page - first_data_page + 1;
- user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+ user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
if (user_pages == NULL)
return -ENOMEM;
goto fail_put_user_pages;
}
+ do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+
mutex_lock(&dev->struct_mutex);
ret = i915_gem_object_get_pages(obj);
if ((data_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - data_page_offset;
- ret = slow_shmem_copy(user_pages[data_page_index],
- data_page_offset,
- obj_priv->pages[shmem_page_index],
- shmem_page_offset,
- page_length);
+ if (do_bit17_swizzling) {
+ ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
+ shmem_page_offset,
+ user_pages[data_page_index],
+ data_page_offset,
+ page_length,
+ 1);
+ } else {
+ ret = slow_shmem_copy(user_pages[data_page_index],
+ data_page_offset,
+ obj_priv->pages[shmem_page_index],
+ shmem_page_offset,
+ page_length);
+ }
if (ret)
goto fail_put_pages;
SetPageDirty(user_pages[i]);
page_cache_release(user_pages[i]);
}
- kfree(user_pages);
+ drm_free_large(user_pages);
return ret;
}
return -EINVAL;
}
- ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
- if (ret != 0)
+ if (i915_gem_object_needs_bit17_swizzle(obj)) {
ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
+ } else {
+ ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
+ if (ret != 0)
+ ret = i915_gem_shmem_pread_slow(dev, obj, args,
+ file_priv);
+ }
drm_gem_object_unreference(obj);
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
num_pages = last_data_page - first_data_page + 1;
- user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+ user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
if (user_pages == NULL)
return -ENOMEM;
out_unpin_pages:
for (i = 0; i < pinned_pages; i++)
page_cache_release(user_pages[i]);
- kfree(user_pages);
+ drm_free_large(user_pages);
return ret;
}
int page_length;
int ret;
uint64_t data_ptr = args->data_ptr;
+ int do_bit17_swizzling;
remain = args->size;
last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
num_pages = last_data_page - first_data_page + 1;
- user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+ user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
if (user_pages == NULL)
return -ENOMEM;
goto fail_put_user_pages;
}
+ do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+
mutex_lock(&dev->struct_mutex);
ret = i915_gem_object_get_pages(obj);
if ((data_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - data_page_offset;
- ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
- shmem_page_offset,
- user_pages[data_page_index],
- data_page_offset,
- page_length);
+ if (do_bit17_swizzling) {
+ ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
+ shmem_page_offset,
+ user_pages[data_page_index],
+ data_page_offset,
+ page_length,
+ 0);
+ } else {
+ ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
+ shmem_page_offset,
+ user_pages[data_page_index],
+ data_page_offset,
+ page_length);
+ }
if (ret)
goto fail_put_pages;
fail_put_user_pages:
for (i = 0; i < pinned_pages; i++)
page_cache_release(user_pages[i]);
- kfree(user_pages);
+ drm_free_large(user_pages);
return ret;
}
ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
file_priv);
}
+ } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
+ ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
} else {
ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
if (ret == -EFAULT) {
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_set_domain *args = data;
struct drm_gem_object *obj;
+ struct drm_i915_gem_object *obj_priv;
uint32_t read_domains = args->read_domains;
uint32_t write_domain = args->write_domain;
int ret;
return -ENODEV;
/* Only handle setting domains to types used by the CPU. */
- if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+ if (write_domain & I915_GEM_GPU_DOMAINS)
return -EINVAL;
- if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+ if (read_domains & I915_GEM_GPU_DOMAINS)
return -EINVAL;
/* Having something in the write domain implies it's in the read
obj = drm_gem_object_lookup(dev, file_priv, args->handle);
if (obj == NULL)
return -EBADF;
+ obj_priv = obj->driver_private;
mutex_lock(&dev->struct_mutex);
+
+ intel_mark_busy(dev, obj);
+
#if WATCH_BUF
- DRM_INFO("set_domain_ioctl %p(%d), %08x %08x\n",
+ DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
obj, obj->size, read_domains, write_domain);
#endif
if (read_domains & I915_GEM_DOMAIN_GTT) {
ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+ /* Update the LRU on the fence for the CPU access that's
+ * about to occur.
+ */
+ if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
+ list_move_tail(&obj_priv->fence_list,
+ &dev_priv->mm.fence_list);
+ }
+
/* Silently promote "you're not bound, there was nothing to do"
* to success, since the client was just asking us to
* make sure everything was done.
}
#if WATCH_BUF
- DRM_INFO("%s: sw_finish %d (%p %d)\n",
+ DRM_INFO("%s: sw_finish %d (%p %zd)\n",
__func__, args->handle, obj, obj->size);
#endif
obj_priv = obj->driver_private;
mutex_unlock(&dev->struct_mutex);
return VM_FAULT_SIGBUS;
}
- list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+ list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, write);
+ if (ret) {
+ mutex_unlock(&dev->struct_mutex);
+ return VM_FAULT_SIGBUS;
+ }
}
/* Need a new fence register? */
- if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
- obj_priv->tiling_mode != I915_TILING_NONE) {
- ret = i915_gem_object_get_fence_reg(obj, write);
+ if (obj_priv->tiling_mode != I915_TILING_NONE) {
+ ret = i915_gem_object_get_fence_reg(obj);
if (ret) {
mutex_unlock(&dev->struct_mutex);
return VM_FAULT_SIGBUS;
/* Set the object up for mmap'ing */
list = &obj->map_list;
- list->map = drm_calloc(1, sizeof(struct drm_map_list),
- DRM_MEM_DRIVER);
+ list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
if (!list->map)
return -ENOMEM;
out_free_mm:
drm_mm_put_block(list->file_offset_node);
out_free_list:
- drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER);
+ kfree(list->map);
return ret;
}
+/**
+ * i915_gem_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmaping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ *
+ * It is vital that we remove the page mapping if we have mapped a tiled
+ * object through the GTT and then lose the fence register due to
+ * resource pressure. Similarly if the object has been moved out of the
+ * aperture, than pages mapped into userspace must be revoked. Removing the
+ * mapping will then trigger a page fault on the next user access, allowing
+ * fixup by i915_gem_fault().
+ */
+void
+i915_gem_release_mmap(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+ if (dev->dev_mapping)
+ unmap_mapping_range(dev->dev_mapping,
+ obj_priv->mmap_offset, obj->size, 1);
+}
+
static void
i915_gem_free_mmap_offset(struct drm_gem_object *obj)
{
}
if (list->map) {
- drm_free(list->map, sizeof(struct drm_map), DRM_MEM_DRIVER);
+ kfree(list->map);
list->map = NULL;
}
mutex_unlock(&dev->struct_mutex);
return ret;
}
- list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+ list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
}
drm_gem_object_unreference(obj);
if (--obj_priv->pages_refcount != 0)
return;
+ if (obj_priv->tiling_mode != I915_TILING_NONE)
+ i915_gem_object_save_bit_17_swizzle(obj);
+
for (i = 0; i < page_count; i++)
if (obj_priv->pages[i] != NULL) {
if (obj_priv->dirty)
}
obj_priv->dirty = 0;
- drm_free(obj_priv->pages,
- page_count * sizeof(struct page *),
- DRM_MEM_DRIVER);
+ drm_free_large(obj_priv->pages);
obj_priv->pages = NULL;
}
* Returned sequence numbers are nonzero on success.
*/
static uint32_t
-i915_add_request(struct drm_device *dev, uint32_t flush_domains)
+i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
+ uint32_t flush_domains)
{
drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_i915_file_private *i915_file_priv = NULL;
struct drm_i915_gem_request *request;
uint32_t seqno;
int was_empty;
RING_LOCALS;
- request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
+ if (file_priv != NULL)
+ i915_file_priv = file_priv->driver_priv;
+
+ request = kzalloc(sizeof(*request), GFP_KERNEL);
if (request == NULL)
return 0;
request->emitted_jiffies = jiffies;
was_empty = list_empty(&dev_priv->mm.request_list);
list_add_tail(&request->list, &dev_priv->mm.request_list);
+ if (i915_file_priv) {
+ list_add_tail(&request->client_list,
+ &i915_file_priv->mm.request_list);
+ } else {
+ INIT_LIST_HEAD(&request->client_list);
+ }
/* Associate any objects on the flushing list matching the write
* domain we're flushing with our flush.
}
- if (was_empty && !dev_priv->mm.suspended)
- schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
+ if (!dev_priv->mm.suspended) {
+ mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
+ if (was_empty)
+ queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+ }
return seqno;
}
if (obj->write_domain != 0)
i915_gem_object_move_to_flushing(obj);
- else
+ else {
+ /* Take a reference on the object so it won't be
+ * freed while the spinlock is held. The list
+ * protection for this spinlock is safe when breaking
+ * the lock like this since the next thing we do
+ * is just get the head of the list again.
+ */
+ drm_gem_object_reference(obj);
i915_gem_object_move_to_inactive(obj);
+ spin_unlock(&dev_priv->mm.active_list_lock);
+ drm_gem_object_unreference(obj);
+ spin_lock(&dev_priv->mm.active_list_lock);
+ }
}
out:
spin_unlock(&dev_priv->mm.active_list_lock);
/**
* Returns true if seq1 is later than seq2.
*/
-static int
+bool
i915_seqno_passed(uint32_t seq1, uint32_t seq2)
{
return (int32_t)(seq1 - seq2) >= 0;
retiring_seqno = request->seqno;
if (i915_seqno_passed(seqno, retiring_seqno) ||
- dev_priv->mm.wedged) {
+ atomic_read(&dev_priv->mm.wedged)) {
i915_gem_retire_request(dev, request);
list_del(&request->list);
- drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
+ list_del(&request->client_list);
+ kfree(request);
} else
break;
}
i915_gem_retire_requests(dev);
if (!dev_priv->mm.suspended &&
!list_empty(&dev_priv->mm.request_list))
- schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
+ queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
mutex_unlock(&dev->struct_mutex);
}
i915_wait_request(struct drm_device *dev, uint32_t seqno)
{
drm_i915_private_t *dev_priv = dev->dev_private;
+ u32 ier;
int ret = 0;
BUG_ON(seqno == 0);
+ if (atomic_read(&dev_priv->mm.wedged))
+ return -EIO;
+
if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
+ if (IS_IGDNG(dev))
+ ier = I915_READ(DEIER) | I915_READ(GTIER);
+ else
+ ier = I915_READ(IER);
+ if (!ier) {
+ DRM_ERROR("something (likely vbetool) disabled "
+ "interrupts, re-enabling\n");
+ i915_driver_irq_preinstall(dev);
+ i915_driver_irq_postinstall(dev);
+ }
+
dev_priv->mm.waiting_gem_seqno = seqno;
i915_user_irq_get(dev);
ret = wait_event_interruptible(dev_priv->irq_queue,
i915_seqno_passed(i915_get_gem_seqno(dev),
seqno) ||
- dev_priv->mm.wedged);
+ atomic_read(&dev_priv->mm.wedged));
i915_user_irq_put(dev);
dev_priv->mm.waiting_gem_seqno = 0;
}
- if (dev_priv->mm.wedged)
+ if (atomic_read(&dev_priv->mm.wedged))
ret = -EIO;
if (ret && ret != -ERESTARTSYS)
if (flush_domains & I915_GEM_DOMAIN_CPU)
drm_agp_chipset_flush(dev);
- if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU |
- I915_GEM_DOMAIN_GTT)) {
+ if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
/*
* read/write caches:
*
{
struct drm_device *dev = obj->dev;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
- loff_t offset;
int ret = 0;
#if WATCH_BUF
return -EINVAL;
}
+ /* blow away mappings if mapped through GTT */
+ i915_gem_release_mmap(obj);
+
+ if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
+ i915_gem_clear_fence_reg(obj);
+
/* Move the object to the CPU domain to ensure that
* any possible CPU writes while it's not in the GTT
* are flushed when we go to remap it. This will
return ret;
}
+ BUG_ON(obj_priv->active);
+
if (obj_priv->agp_mem != NULL) {
drm_unbind_agp(obj_priv->agp_mem);
drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
obj_priv->agp_mem = NULL;
}
- BUG_ON(obj_priv->active);
-
- /* blow away mappings if mapped through GTT */
- offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT;
- if (dev->dev_mapping)
- unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1);
-
- if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
- i915_gem_clear_fence_reg(obj);
-
i915_gem_object_put_pages(obj);
if (obj_priv->gtt_space) {
i915_gem_flush(dev,
obj->write_domain,
obj->write_domain);
- i915_add_request(dev, obj->write_domain);
+ i915_add_request(dev, NULL, obj->write_domain);
obj = NULL;
continue;
/* If we didn't do any of the above, there's nothing to be done
* and we just can't fit it in.
*/
- return -ENOMEM;
+ return -ENOSPC;
}
return ret;
}
if (ret != 0)
break;
}
- if (ret == -ENOMEM)
+ if (ret == -ENOSPC)
return 0;
return ret;
}
*/
page_count = obj->size / PAGE_SIZE;
BUG_ON(obj_priv->pages != NULL);
- obj_priv->pages = drm_calloc(page_count, sizeof(struct page *),
- DRM_MEM_DRIVER);
+ obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
if (obj_priv->pages == NULL) {
DRM_ERROR("Faled to allocate page list\n");
obj_priv->pages_refcount--;
}
obj_priv->pages[i] = page;
}
+
+ if (obj_priv->tiling_mode != I915_TILING_NONE)
+ i915_gem_object_do_bit_17_swizzle(obj);
+
return 0;
}
return;
}
- pitch_val = (obj_priv->stride / 128) - 1;
- WARN_ON(pitch_val & ~0x0000000f);
+ pitch_val = obj_priv->stride / 128;
+ pitch_val = ffs(pitch_val) - 1;
+ WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
+
val = obj_priv->gtt_offset;
if (obj_priv->tiling_mode == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
val |= I830_FENCE_REG_VALID;
I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
-
}
/**
* i915_gem_object_get_fence_reg - set up a fence reg for an object
* @obj: object to map through a fence reg
- * @write: object is about to be written
*
* When mapping objects through the GTT, userspace wants to be able to write
* to them without having to worry about swizzling if the object is tiled.
* It then sets up the reg based on the object's properties: address, pitch
* and tiling format.
*/
-static int
-i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write)
+int
+i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *old_obj_priv = NULL;
int i, ret, avail;
+ /* Just update our place in the LRU if our fence is getting used. */
+ if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
+ list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
+ return 0;
+ }
+
switch (obj_priv->tiling_mode) {
case I915_TILING_NONE:
WARN(1, "allocating a fence for non-tiled object?\n");
}
/* First try to find a free reg */
-try_again:
avail = 0;
for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
reg = &dev_priv->fence_regs[i];
/* None available, try to steal one or wait for a user to finish */
if (i == dev_priv->num_fence_regs) {
- uint32_t seqno = dev_priv->mm.next_gem_seqno;
- loff_t offset;
+ struct drm_gem_object *old_obj = NULL;
if (avail == 0)
- return -ENOMEM;
-
- for (i = dev_priv->fence_reg_start;
- i < dev_priv->num_fence_regs; i++) {
- uint32_t this_seqno;
+ return -ENOSPC;
- reg = &dev_priv->fence_regs[i];
- old_obj_priv = reg->obj->driver_private;
+ list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list,
+ fence_list) {
+ old_obj = old_obj_priv->obj;
if (old_obj_priv->pin_count)
continue;
+ /* Take a reference, as otherwise the wait_rendering
+ * below may cause the object to get freed out from
+ * under us.
+ */
+ drm_gem_object_reference(old_obj);
+
/* i915 uses fences for GPU access to tiled buffers */
if (IS_I965G(dev) || !old_obj_priv->active)
break;
- /* find the seqno of the first available fence */
- this_seqno = old_obj_priv->last_rendering_seqno;
- if (this_seqno != 0 &&
- reg->obj->write_domain == 0 &&
- i915_seqno_passed(seqno, this_seqno))
- seqno = this_seqno;
- }
-
- /*
- * Now things get ugly... we have to wait for one of the
- * objects to finish before trying again.
- */
- if (i == dev_priv->num_fence_regs) {
- if (seqno == dev_priv->mm.next_gem_seqno) {
- i915_gem_flush(dev,
- I915_GEM_GPU_DOMAINS,
- I915_GEM_GPU_DOMAINS);
- seqno = i915_add_request(dev,
- I915_GEM_GPU_DOMAINS);
- if (seqno == 0)
- return -ENOMEM;
+ /* This brings the object to the head of the LRU if it
+ * had been written to. The only way this should
+ * result in us waiting longer than the expected
+ * optimal amount of time is if there was a
+ * fence-using buffer later that was read-only.
+ */
+ i915_gem_object_flush_gpu_write_domain(old_obj);
+ ret = i915_gem_object_wait_rendering(old_obj);
+ if (ret != 0) {
+ drm_gem_object_unreference(old_obj);
+ return ret;
}
- ret = i915_wait_request(dev, seqno);
- if (ret)
- return ret;
- goto try_again;
+ break;
}
- BUG_ON(old_obj_priv->active ||
- (reg->obj->write_domain & I915_GEM_GPU_DOMAINS));
-
/*
* Zap this virtual mapping so we can set up a fence again
* for this object next time we need it.
*/
- offset = ((loff_t) reg->obj->map_list.hash.key) << PAGE_SHIFT;
- if (dev->dev_mapping)
- unmap_mapping_range(dev->dev_mapping, offset,
- reg->obj->size, 1);
+ i915_gem_release_mmap(old_obj);
+
+ i = old_obj_priv->fence_reg;
+ reg = &dev_priv->fence_regs[i];
+
old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
+ list_del_init(&old_obj_priv->fence_list);
+
+ drm_gem_object_unreference(old_obj);
}
obj_priv->fence_reg = i;
+ list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
+
reg->obj = obj;
if (IS_I965G(dev))
dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
obj_priv->fence_reg = I915_FENCE_REG_NONE;
+ list_del_init(&obj_priv->fence_list);
+}
+
+/**
+ * i915_gem_object_put_fence_reg - waits on outstanding fenced access
+ * to the buffer to finish, and then resets the fence register.
+ * @obj: tiled object holding a fence register.
+ *
+ * Zeroes out the fence register itself and clears out the associated
+ * data structures in dev_priv and obj_priv.
+ */
+int
+i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+ if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
+ return 0;
+
+ /* On the i915, GPU access to tiled buffers is via a fence,
+ * therefore we must wait for any outstanding access to complete
+ * before clearing the fence.
+ */
+ if (!IS_I965G(dev)) {
+ int ret;
+
+ i915_gem_object_flush_gpu_write_domain(obj);
+ i915_gem_object_flush_gtt_write_domain(obj);
+ ret = i915_gem_object_wait_rendering(obj);
+ if (ret != 0)
+ return ret;
+ }
+
+ i915_gem_clear_fence_reg (obj);
+
+ return 0;
}
/**
spin_unlock(&dev_priv->mm.active_list_lock);
if (lists_empty) {
DRM_ERROR("GTT full, but LRU list empty\n");
- return -ENOMEM;
+ return -ENOSPC;
}
ret = i915_gem_evict_something(dev);
}
#if WATCH_BUF
- DRM_INFO("Binding object of size %d at 0x%08x\n",
+ DRM_INFO("Binding object of size %zd at 0x%08x\n",
obj->size, obj_priv->gtt_offset);
#endif
ret = i915_gem_object_get_pages(obj);
* wasn't in the GTT, there shouldn't be any way it could have been in
* a GPU cache
*/
- BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
- BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
+ BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+ BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
return 0;
}
/* Queue the GPU write cache flushing we need. */
i915_gem_flush(dev, 0, obj->write_domain);
- seqno = i915_add_request(dev, obj->write_domain);
+ seqno = i915_add_request(dev, NULL, obj->write_domain);
obj->write_domain = 0;
i915_gem_object_move_to_active(obj, seqno);
}
BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
+ intel_mark_busy(dev, obj);
+
#if WATCH_BUF
DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
__func__, obj,
/* Free the page_cpu_valid mappings which are now stale, whether
* or not we've got I915_GEM_DOMAIN_CPU.
*/
- drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE,
- DRM_MEM_DRIVER);
+ kfree(obj_priv->page_cpu_valid);
obj_priv->page_cpu_valid = NULL;
}
* newly adding I915_GEM_DOMAIN_CPU
*/
if (obj_priv->page_cpu_valid == NULL) {
- obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE,
- DRM_MEM_DRIVER);
+ obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
+ GFP_KERNEL);
if (obj_priv->page_cpu_valid == NULL)
return -ENOMEM;
} else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
drm_i915_private_t *dev_priv = dev->dev_private;
int nbox = exec->num_cliprects;
int i = 0, count;
- uint32_t exec_start, exec_len;
+ uint32_t exec_start, exec_len;
RING_LOCALS;
exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
exec_len = (uint32_t) exec->batch_len;
- if ((exec_start | exec_len) & 0x7) {
- DRM_ERROR("alignment\n");
- return -EINVAL;
- }
-
- if (!exec_start)
- return -EINVAL;
-
count = nbox ? nbox : 1;
for (i = 0; i < count; i++) {
/* Throttle our rendering by waiting until the ring has completed our requests
* emitted over 20 msec ago.
*
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
* This should get us reasonable parallelism between CPU and GPU but also
* relatively low latency when blocking on a particular request to finish.
*/
{
struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
int ret = 0;
- uint32_t seqno;
+ unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
mutex_lock(&dev->struct_mutex);
- seqno = i915_file_priv->mm.last_gem_throttle_seqno;
- i915_file_priv->mm.last_gem_throttle_seqno =
- i915_file_priv->mm.last_gem_seqno;
- if (seqno)
- ret = i915_wait_request(dev, seqno);
+ while (!list_empty(&i915_file_priv->mm.request_list)) {
+ struct drm_i915_gem_request *request;
+
+ request = list_first_entry(&i915_file_priv->mm.request_list,
+ struct drm_i915_gem_request,
+ client_list);
+
+ if (time_after_eq(request->emitted_jiffies, recent_enough))
+ break;
+
+ ret = i915_wait_request(dev, request->seqno);
+ if (ret != 0)
+ break;
+ }
mutex_unlock(&dev->struct_mutex);
+
return ret;
}
reloc_count += exec_list[i].relocation_count;
}
- *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER);
+ *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
if (*relocs == NULL)
return -ENOMEM;
exec_list[i].relocation_count *
sizeof(**relocs));
if (ret != 0) {
- drm_free(*relocs, reloc_count * sizeof(**relocs),
- DRM_MEM_DRIVER);
+ drm_free_large(*relocs);
*relocs = NULL;
return -EFAULT;
}
}
err:
- drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER);
+ drm_free_large(relocs);
return ret;
}
+static int
+i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
+ uint64_t exec_offset)
+{
+ uint32_t exec_start, exec_len;
+
+ exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
+ exec_len = (uint32_t) exec->batch_len;
+
+ if ((exec_start | exec_len) & 0x7)
+ return -EINVAL;
+
+ if (!exec_start)
+ return -EINVAL;
+
+ return 0;
+}
+
int
i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
drm_i915_private_t *dev_priv = dev->dev_private;
- struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
struct drm_i915_gem_execbuffer *args = data;
struct drm_i915_gem_exec_object *exec_list = NULL;
struct drm_gem_object **object_list = NULL;
return -EINVAL;
}
/* Copy in the exec list from userland */
- exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count,
- DRM_MEM_DRIVER);
- object_list = drm_calloc(sizeof(*object_list), args->buffer_count,
- DRM_MEM_DRIVER);
+ exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count);
+ object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count);
if (exec_list == NULL || object_list == NULL) {
DRM_ERROR("Failed to allocate exec or object list "
"for %d buffers\n",
}
if (args->num_cliprects != 0) {
- cliprects = drm_calloc(args->num_cliprects, sizeof(*cliprects),
- DRM_MEM_DRIVER);
+ cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
+ GFP_KERNEL);
if (cliprects == NULL)
goto pre_mutex_err;
i915_verify_inactive(dev, __FILE__, __LINE__);
- if (dev_priv->mm.wedged) {
+ if (atomic_read(&dev_priv->mm.wedged)) {
DRM_ERROR("Execbuf while wedged\n");
mutex_unlock(&dev->struct_mutex);
ret = -EIO;
break;
/* error other than GTT full, or we've already tried again */
- if (ret != -ENOMEM || pin_tries >= 1) {
+ if (ret != -ENOSPC || pin_tries >= 1) {
if (ret != -ERESTARTSYS)
DRM_ERROR("Failed to pin buffers %d\n", ret);
goto err;
/* Set the pending read domains for the batch buffer to COMMAND */
batch_obj = object_list[args->buffer_count-1];
- batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND;
- batch_obj->pending_write_domain = 0;
+ if (batch_obj->pending_write_domain) {
+ DRM_ERROR("Attempting to use self-modifying batch buffer\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
+
+ /* Sanity check the batch buffer, prior to moving objects */
+ exec_offset = exec_list[args->buffer_count - 1].offset;
+ ret = i915_gem_check_execbuffer (args, exec_offset);
+ if (ret != 0) {
+ DRM_ERROR("execbuf with invalid offset/length\n");
+ goto err;
+ }
i915_verify_inactive(dev, __FILE__, __LINE__);
dev->invalidate_domains,
dev->flush_domains);
if (dev->flush_domains)
- (void)i915_add_request(dev, dev->flush_domains);
+ (void)i915_add_request(dev, file_priv,
+ dev->flush_domains);
}
for (i = 0; i < args->buffer_count; i++) {
}
#endif
- exec_offset = exec_list[args->buffer_count - 1].offset;
-
#if WATCH_EXEC
i915_gem_dump_object(batch_obj,
args->batch_len,
* *some* interrupts representing completion of buffers that we can
* wait on when trying to clear up gtt space).
*/
- seqno = i915_add_request(dev, flush_domains);
+ seqno = i915_add_request(dev, file_priv, flush_domains);
BUG_ON(seqno == 0);
- i915_file_priv->mm.last_gem_seqno = seqno;
for (i = 0; i < args->buffer_count; i++) {
struct drm_gem_object *obj = object_list[i];
}
pre_mutex_err:
- drm_free(object_list, sizeof(*object_list) * args->buffer_count,
- DRM_MEM_DRIVER);
- drm_free(exec_list, sizeof(*exec_list) * args->buffer_count,
- DRM_MEM_DRIVER);
- drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects,
- DRM_MEM_DRIVER);
+ drm_free_large(object_list);
+ drm_free_large(exec_list);
+ kfree(cliprects);
return ret;
}
* Pre-965 chips need a fence register set up in order to
* properly handle tiled surfaces.
*/
- if (!IS_I965G(dev) &&
- obj_priv->fence_reg == I915_FENCE_REG_NONE &&
- obj_priv->tiling_mode != I915_TILING_NONE) {
- ret = i915_gem_object_get_fence_reg(obj, true);
+ if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
+ ret = i915_gem_object_get_fence_reg(obj);
if (ret != 0) {
if (ret != -EBUSY && ret != -ERESTARTSYS)
DRM_ERROR("Failure to install fence: %d\n",
atomic_inc(&dev->pin_count);
atomic_add(obj->size, &dev->pin_memory);
if (!obj_priv->active &&
- (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
- I915_GEM_DOMAIN_GTT)) == 0 &&
+ (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 &&
!list_empty(&obj_priv->list))
list_del_init(&obj_priv->list);
}
*/
if (obj_priv->pin_count == 0) {
if (!obj_priv->active &&
- (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
- I915_GEM_DOMAIN_GTT)) == 0)
+ (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
list_move_tail(&obj_priv->list,
&dev_priv->mm.inactive_list);
atomic_dec(&dev->pin_count);
struct drm_gem_object *obj;
struct drm_i915_gem_object *obj_priv;
- mutex_lock(&dev->struct_mutex);
obj = drm_gem_object_lookup(dev, file_priv, args->handle);
if (obj == NULL) {
DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
args->handle);
- mutex_unlock(&dev->struct_mutex);
return -EBADF;
}
+ mutex_lock(&dev->struct_mutex);
/* Update the active list for the hardware's current position.
* Otherwise this only updates on a delayed timer or when irqs are
* actually unmasked, and our working set ends up being larger than
{
struct drm_i915_gem_object *obj_priv;
- obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER);
+ obj_priv = kzalloc(sizeof(*obj_priv), GFP_KERNEL);
if (obj_priv == NULL)
return -ENOMEM;
obj_priv->obj = obj;
obj_priv->fence_reg = I915_FENCE_REG_NONE;
INIT_LIST_HEAD(&obj_priv->list);
+ INIT_LIST_HEAD(&obj_priv->fence_list);
return 0;
}
i915_gem_object_unbind(obj);
- i915_gem_free_mmap_offset(obj);
+ if (obj_priv->mmap_offset)
+ i915_gem_free_mmap_offset(obj);
- drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
- drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
+ kfree(obj_priv->page_cpu_valid);
+ kfree(obj_priv->bit_17);
+ kfree(obj->driver_private);
}
/** Unbinds all objects that are on the given buffer list. */
* We need to replace this with a semaphore, or something.
*/
dev_priv->mm.suspended = 1;
+ del_timer(&dev_priv->hangcheck_timer);
/* Cancel the retire work handler, wait for it to finish if running
*/
/* Flush the GPU along with all non-CPU write domains
*/
- i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
- ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
- seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
+ i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+ seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
if (seqno == 0) {
mutex_unlock(&dev->struct_mutex);
if (last_seqno == cur_seqno) {
if (stuck++ > 100) {
DRM_ERROR("hardware wedged\n");
- dev_priv->mm.wedged = 1;
+ atomic_set(&dev_priv->mm.wedged, 1);
DRM_WAKEUP(&dev_priv->irq_queue);
break;
}
i915_gem_retire_requests(dev);
spin_lock(&dev_priv->mm.active_list_lock);
- if (!dev_priv->mm.wedged) {
+ if (!atomic_read(&dev_priv->mm.wedged)) {
/* Active and flushing should now be empty as we've
* waited for a sequence higher than any pending execbuffer
*/
/* Set up the kernel mapping for the ring. */
ring->Size = obj->size;
- ring->tail_mask = obj->size - 1;
ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
ring->map.size = obj->size;
if (drm_core_check_feature(dev, DRIVER_MODESET))
return 0;
- if (dev_priv->mm.wedged) {
+ if (atomic_read(&dev_priv->mm.wedged)) {
DRM_ERROR("Reenabling wedged hardware, good luck\n");
- dev_priv->mm.wedged = 0;
+ atomic_set(&dev_priv->mm.wedged, 0);
}
mutex_lock(&dev->struct_mutex);
dev_priv->mm.suspended = 0;
ret = i915_gem_init_ringbuffer(dev);
- if (ret != 0)
+ if (ret != 0) {
+ mutex_unlock(&dev->struct_mutex);
return ret;
+ }
spin_lock(&dev_priv->mm.active_list_lock);
BUG_ON(!list_empty(&dev_priv->mm.active_list));
void
i915_gem_load(struct drm_device *dev)
{
+ int i;
drm_i915_private_t *dev_priv = dev->dev_private;
spin_lock_init(&dev_priv->mm.active_list_lock);
INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
INIT_LIST_HEAD(&dev_priv->mm.request_list);
+ INIT_LIST_HEAD(&dev_priv->mm.fence_list);
INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
i915_gem_retire_work_handler);
dev_priv->mm.next_gem_seqno = 1;
else
dev_priv->num_fence_regs = 8;
+ /* Initialize fence registers to zero */
+ if (IS_I965G(dev)) {
+ for (i = 0; i < 16; i++)
+ I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
+ } else {
+ for (i = 0; i < 8; i++)
+ I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
+ if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+ for (i = 0; i < 8; i++)
+ I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
+ }
+
i915_gem_detect_bit_6_swizzle(dev);
}
if (dev_priv->mm.phys_objs[id - 1] || !size)
return 0;
- phys_obj = drm_calloc(1, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER);
+ phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
if (!phys_obj)
return -ENOMEM;
return 0;
kfree_obj:
- drm_free(phys_obj, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER);
+ kfree(phys_obj);
return ret;
}
}
drm_clflush_pages(obj_priv->pages, page_count);
drm_agp_chipset_flush(dev);
+
+ i915_gem_object_put_pages(obj);
out:
obj_priv->phys_obj->cur_obj = NULL;
obj_priv->phys_obj = NULL;
kunmap_atomic(src, KM_USER0);
}
+ i915_gem_object_put_pages(obj);
+
return 0;
out:
return ret;
drm_agp_chipset_flush(dev);
return 0;
}
+
+void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
+{
+ struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
+
+ /* Clean up our request list when the client is going away, so that
+ * later retire_requests won't dereference our soon-to-be-gone
+ * file_priv.
+ */
+ mutex_lock(&dev->struct_mutex);
+ while (!list_empty(&i915_file_priv->mm.request_list))
+ list_del_init(i915_file_priv->mm.request_list.next);
+ mutex_unlock(&dev->struct_mutex);
+}