X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fgpu%2Fdrm%2Fi915%2Fi915_gem.c;h=cf5dc08b6fa85e7a5969bee275476ade70b3e8f7;hb=21d509e339565c82887733c02465bb7f5866c8f5;hp=9f4eceb8093d90719f4628e59af94b3df5cabefc;hpb=959b887cf42fd63cf10e28a7f26126f78aa1c0b0;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9f4eceb..cf5dc08 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -43,8 +43,6 @@ static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, uint64_t offset, uint64_t size); static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); -static int i915_gem_object_get_pages(struct drm_gem_object *obj); -static void i915_gem_object_put_pages(struct drm_gem_object *obj); static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); @@ -143,15 +141,27 @@ fast_shmem_read(struct page **pages, int length) { char __iomem *vaddr; - int ret; + int unwritten; vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); if (vaddr == NULL) return -ENOMEM; - ret = __copy_to_user_inatomic(data, vaddr + page_offset, length); + unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length); kunmap_atomic(vaddr, KM_USER0); - return ret; + if (unwritten) + return -EFAULT; + + return 0; +} + +static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) +{ + drm_i915_private_t *dev_priv = obj->dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + + return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && + obj_priv->tiling_mode != I915_TILING_NONE; } static inline int @@ -181,6 +191,64 @@ slow_shmem_copy(struct page *dst_page, return 0; } +static inline int +slow_shmem_bit17_copy(struct page *gpu_page, + int gpu_offset, + struct page *cpu_page, + int cpu_offset, + int length, + int is_read) +{ + char *gpu_vaddr, *cpu_vaddr; + + /* Use the unswizzled path if this page isn't affected. */ + if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { + if (is_read) + return slow_shmem_copy(cpu_page, cpu_offset, + gpu_page, gpu_offset, length); + else + return slow_shmem_copy(gpu_page, gpu_offset, + cpu_page, cpu_offset, length); + } + + gpu_vaddr = kmap_atomic(gpu_page, KM_USER0); + if (gpu_vaddr == NULL) + return -ENOMEM; + + cpu_vaddr = kmap_atomic(cpu_page, KM_USER1); + if (cpu_vaddr == NULL) { + kunmap_atomic(gpu_vaddr, KM_USER0); + return -ENOMEM; + } + + /* Copy the data, XORing A6 with A17 (1). The user already knows he's + * XORing with the other bits (A9 for Y, A9 and A10 for X) + */ + while (length > 0) { + int cacheline_end = ALIGN(gpu_offset + 1, 64); + int this_length = min(cacheline_end - gpu_offset, length); + int swizzled_gpu_offset = gpu_offset ^ 64; + + if (is_read) { + memcpy(cpu_vaddr + cpu_offset, + gpu_vaddr + swizzled_gpu_offset, + this_length); + } else { + memcpy(gpu_vaddr + swizzled_gpu_offset, + cpu_vaddr + cpu_offset, + this_length); + } + cpu_offset += this_length; + gpu_offset += this_length; + length -= this_length; + } + + kunmap_atomic(cpu_vaddr, KM_USER1); + kunmap_atomic(gpu_vaddr, KM_USER0); + + return 0; +} + /** * This is the fast shmem pread path, which attempts to copy_from_user directly * from the backing pages of the object to the user's address space. On a @@ -269,6 +337,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, int page_length; int ret; uint64_t data_ptr = args->data_ptr; + int do_bit17_swizzling; remain = args->size; @@ -280,19 +349,21 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; num_pages = last_data_page - first_data_page + 1; - user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); if (user_pages == NULL) return -ENOMEM; down_read(&mm->mmap_sem); pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, - num_pages, 0, 0, user_pages, NULL); + num_pages, 1, 0, user_pages, NULL); up_read(&mm->mmap_sem); if (pinned_pages < num_pages) { ret = -EFAULT; goto fail_put_user_pages; } + do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + mutex_lock(&dev->struct_mutex); ret = i915_gem_object_get_pages(obj); @@ -327,11 +398,20 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - ret = slow_shmem_copy(user_pages[data_page_index], - data_page_offset, - obj_priv->pages[shmem_page_index], - shmem_page_offset, - page_length); + if (do_bit17_swizzling) { + ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length, + 1); + } else { + ret = slow_shmem_copy(user_pages[data_page_index], + data_page_offset, + obj_priv->pages[shmem_page_index], + shmem_page_offset, + page_length); + } if (ret) goto fail_put_pages; @@ -349,7 +429,7 @@ fail_put_user_pages: SetPageDirty(user_pages[i]); page_cache_release(user_pages[i]); } - kfree(user_pages); + drm_free_large(user_pages); return ret; } @@ -383,9 +463,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); - if (ret != 0) + if (i915_gem_object_needs_bit17_swizzle(obj)) { ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); + } else { + ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); + if (ret != 0) + ret = i915_gem_shmem_pread_slow(dev, obj, args, + file_priv); + } drm_gem_object_unreference(obj); @@ -564,7 +649,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; num_pages = last_data_page - first_data_page + 1; - user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); if (user_pages == NULL) return -ENOMEM; @@ -634,7 +719,7 @@ out_unlock: out_unpin_pages: for (i = 0; i < pinned_pages; i++) page_cache_release(user_pages[i]); - kfree(user_pages); + drm_free_large(user_pages); return ret; } @@ -727,6 +812,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, int page_length; int ret; uint64_t data_ptr = args->data_ptr; + int do_bit17_swizzling; remain = args->size; @@ -738,7 +824,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; num_pages = last_data_page - first_data_page + 1; - user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); if (user_pages == NULL) return -ENOMEM; @@ -751,6 +837,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, goto fail_put_user_pages; } + do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + mutex_lock(&dev->struct_mutex); ret = i915_gem_object_get_pages(obj); @@ -785,11 +873,20 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], - shmem_page_offset, - user_pages[data_page_index], - data_page_offset, - page_length); + if (do_bit17_swizzling) { + ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length, + 0); + } else { + ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length); + } if (ret) goto fail_put_pages; @@ -805,7 +902,7 @@ fail_unlock: fail_put_user_pages: for (i = 0; i < pinned_pages; i++) page_cache_release(user_pages[i]); - kfree(user_pages); + drm_free_large(user_pages); return ret; } @@ -854,6 +951,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file_priv); } + } else if (i915_gem_object_needs_bit17_swizzle(obj)) { + ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); } else { ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); if (ret == -EFAULT) { @@ -890,10 +989,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, return -ENODEV; /* Only handle setting domains to types used by the CPU. */ - if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) + if (write_domain & I915_GEM_GPU_DOMAINS) return -EINVAL; - if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) + if (read_domains & I915_GEM_GPU_DOMAINS) return -EINVAL; /* Having something in the write domain implies it's in the read @@ -1046,7 +1145,14 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; } - list_add(&obj_priv->list, &dev_priv->mm.inactive_list); + + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) { + mutex_unlock(&dev->struct_mutex); + return VM_FAULT_SIGBUS; + } + + list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); } /* Need a new fence register? */ @@ -1276,7 +1382,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev->struct_mutex); return ret; } - list_add(&obj_priv->list, &dev_priv->mm.inactive_list); + list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); } drm_gem_object_unreference(obj); @@ -1285,7 +1391,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, return 0; } -static void +void i915_gem_object_put_pages(struct drm_gem_object *obj) { struct drm_i915_gem_object *obj_priv = obj->driver_private; @@ -1297,6 +1403,9 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) if (--obj_priv->pages_refcount != 0) return; + if (obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_save_bit_17_swizzle(obj); + for (i = 0; i < page_count; i++) if (obj_priv->pages[i] != NULL) { if (obj_priv->dirty) @@ -1306,9 +1415,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) } obj_priv->dirty = 0; - drm_free(obj_priv->pages, - page_count * sizeof(struct page *), - DRM_MEM_DRIVER); + drm_free_large(obj_priv->pages); obj_priv->pages = NULL; } @@ -1325,8 +1432,10 @@ i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno) obj_priv->active = 1; } /* Move from whatever list we were on to the tail of execution. */ + spin_lock(&dev_priv->mm.active_list_lock); list_move_tail(&obj_priv->list, &dev_priv->mm.active_list); + spin_unlock(&dev_priv->mm.active_list_lock); obj_priv->last_rendering_seqno = seqno; } @@ -1372,14 +1481,19 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj) * Returned sequence numbers are nonzero on success. */ static uint32_t -i915_add_request(struct drm_device *dev, uint32_t flush_domains) +i915_add_request(struct drm_device *dev, struct drm_file *file_priv, + uint32_t flush_domains) { drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_file_private *i915_file_priv = NULL; struct drm_i915_gem_request *request; uint32_t seqno; int was_empty; RING_LOCALS; + if (file_priv != NULL) + i915_file_priv = file_priv->driver_priv; + request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); if (request == NULL) return 0; @@ -1406,6 +1520,12 @@ i915_add_request(struct drm_device *dev, uint32_t flush_domains) request->emitted_jiffies = jiffies; was_empty = list_empty(&dev_priv->mm.request_list); list_add_tail(&request->list, &dev_priv->mm.request_list); + if (i915_file_priv) { + list_add_tail(&request->client_list, + &i915_file_priv->mm.request_list); + } else { + INIT_LIST_HEAD(&request->client_list); + } /* Associate any objects on the flushing list matching the write * domain we're flushing with our flush. @@ -1468,6 +1588,7 @@ i915_gem_retire_request(struct drm_device *dev, /* Move any buffers on the active list that are no longer referenced * by the ringbuffer to the flushing/inactive lists as appropriate. */ + spin_lock(&dev_priv->mm.active_list_lock); while (!list_empty(&dev_priv->mm.active_list)) { struct drm_gem_object *obj; struct drm_i915_gem_object *obj_priv; @@ -1482,7 +1603,7 @@ i915_gem_retire_request(struct drm_device *dev, * this seqno. */ if (obj_priv->last_rendering_seqno != request->seqno) - return; + goto out; #if WATCH_LRU DRM_INFO("%s: retire %d moves to inactive list %p\n", @@ -1491,9 +1612,22 @@ i915_gem_retire_request(struct drm_device *dev, if (obj->write_domain != 0) i915_gem_object_move_to_flushing(obj); - else + else { + /* Take a reference on the object so it won't be + * freed while the spinlock is held. The list + * protection for this spinlock is safe when breaking + * the lock like this since the next thing we do + * is just get the head of the list again. + */ + drm_gem_object_reference(obj); i915_gem_object_move_to_inactive(obj); + spin_unlock(&dev_priv->mm.active_list_lock); + drm_gem_object_unreference(obj); + spin_lock(&dev_priv->mm.active_list_lock); + } } +out: + spin_unlock(&dev_priv->mm.active_list_lock); } /** @@ -1541,6 +1675,7 @@ i915_gem_retire_requests(struct drm_device *dev) i915_gem_retire_request(dev, request); list_del(&request->list); + list_del(&request->client_list); drm_free(request, sizeof(*request), DRM_MEM_DRIVER); } else break; @@ -1573,11 +1708,23 @@ static int i915_wait_request(struct drm_device *dev, uint32_t seqno) { drm_i915_private_t *dev_priv = dev->dev_private; + u32 ier; int ret = 0; BUG_ON(seqno == 0); if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { + if (IS_IGDNG(dev)) + ier = I915_READ(DEIER) | I915_READ(GTIER); + else + ier = I915_READ(IER); + if (!ier) { + DRM_ERROR("something (likely vbetool) disabled " + "interrupts, re-enabling\n"); + i915_driver_irq_preinstall(dev); + i915_driver_irq_postinstall(dev); + } + dev_priv->mm.waiting_gem_seqno = seqno; i915_user_irq_get(dev); ret = wait_event_interruptible(dev_priv->irq_queue, @@ -1622,8 +1769,7 @@ i915_gem_flush(struct drm_device *dev, if (flush_domains & I915_GEM_DOMAIN_CPU) drm_agp_chipset_flush(dev); - if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU | - I915_GEM_DOMAIN_GTT)) { + if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { /* * read/write caches: * @@ -1845,7 +1991,7 @@ i915_gem_evict_something(struct drm_device *dev) i915_gem_flush(dev, obj->write_domain, obj->write_domain); - i915_add_request(dev, obj->write_domain); + i915_add_request(dev, NULL, obj->write_domain); obj = NULL; continue; @@ -1879,7 +2025,7 @@ i915_gem_evict_everything(struct drm_device *dev) return ret; } -static int +int i915_gem_object_get_pages(struct drm_gem_object *obj) { struct drm_i915_gem_object *obj_priv = obj->driver_private; @@ -1897,8 +2043,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) */ page_count = obj->size / PAGE_SIZE; BUG_ON(obj_priv->pages != NULL); - obj_priv->pages = drm_calloc(page_count, sizeof(struct page *), - DRM_MEM_DRIVER); + obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); if (obj_priv->pages == NULL) { DRM_ERROR("Faled to allocate page list\n"); obj_priv->pages_refcount--; @@ -1917,6 +2062,10 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) } obj_priv->pages[i] = page; } + + if (obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_do_bit_17_swizzle(obj); + return 0; } @@ -2000,8 +2149,10 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) return; } - pitch_val = (obj_priv->stride / 128) - 1; - WARN_ON(pitch_val & ~0x0000000f); + pitch_val = obj_priv->stride / 128; + pitch_val = ffs(pitch_val) - 1; + WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); + val = obj_priv->gtt_offset; if (obj_priv->tiling_mode == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; @@ -2111,7 +2262,7 @@ try_again: i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); - seqno = i915_add_request(dev, + seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); if (seqno == 0) return -ENOMEM; @@ -2123,9 +2274,6 @@ try_again: goto try_again; } - BUG_ON(old_obj_priv->active || - (reg->obj->write_domain & I915_GEM_GPU_DOMAINS)); - /* * Zap this virtual mapping so we can set up a fence again * for this object next time we need it. @@ -2215,15 +2363,20 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) } } if (obj_priv->gtt_space == NULL) { + bool lists_empty; + /* If the gtt is empty and we're still having trouble * fitting our object in, we're out of memory. */ #if WATCH_LRU DRM_INFO("%s: GTT full, evicting something\n", __func__); #endif - if (list_empty(&dev_priv->mm.inactive_list) && - list_empty(&dev_priv->mm.flushing_list) && - list_empty(&dev_priv->mm.active_list)) { + spin_lock(&dev_priv->mm.active_list_lock); + lists_empty = (list_empty(&dev_priv->mm.inactive_list) && + list_empty(&dev_priv->mm.flushing_list) && + list_empty(&dev_priv->mm.active_list)); + spin_unlock(&dev_priv->mm.active_list_lock); + if (lists_empty) { DRM_ERROR("GTT full, but LRU list empty\n"); return -ENOMEM; } @@ -2270,8 +2423,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) * wasn't in the GTT, there shouldn't be any way it could have been in * a GPU cache */ - BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); - BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); + BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); + BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); return 0; } @@ -2288,6 +2441,16 @@ i915_gem_clflush_object(struct drm_gem_object *obj) if (obj_priv->pages == NULL) return; + /* XXX: The 865 in particular appears to be weird in how it handles + * cache flushing. We haven't figured it out, but the + * clflush+agp_chipset_flush doesn't appear to successfully get the + * data visible to the PGU, while wbinvd + agp_chipset_flush does. + */ + if (IS_I865G(obj->dev)) { + wbinvd(); + return; + } + drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); } @@ -2303,7 +2466,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) /* Queue the GPU write cache flushing we need. */ i915_gem_flush(dev, 0, obj->write_domain); - seqno = i915_add_request(dev, obj->write_domain); + seqno = i915_add_request(dev, NULL, obj->write_domain); obj->write_domain = 0; i915_gem_object_move_to_active(obj, seqno); } @@ -2886,20 +3049,12 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev, drm_i915_private_t *dev_priv = dev->dev_private; int nbox = exec->num_cliprects; int i = 0, count; - uint32_t exec_start, exec_len; + uint32_t exec_start, exec_len; RING_LOCALS; exec_start = (uint32_t) exec_offset + exec->batch_start_offset; exec_len = (uint32_t) exec->batch_len; - if ((exec_start | exec_len) & 0x7) { - DRM_ERROR("alignment\n"); - return -EINVAL; - } - - if (!exec_start) - return -EINVAL; - count = nbox ? nbox : 1; for (i = 0; i < count; i++) { @@ -2940,6 +3095,10 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev, /* Throttle our rendering by waiting until the ring has completed our requests * emitted over 20 msec ago. * + * Note that if we were to use the current jiffies each time around the loop, + * we wouldn't escape the function with any frames outstanding if the time to + * render a frame was over 20ms. + * * This should get us reasonable parallelism between CPU and GPU but also * relatively low latency when blocking on a particular request to finish. */ @@ -2948,15 +3107,25 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) { struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; int ret = 0; - uint32_t seqno; + unsigned long recent_enough = jiffies - msecs_to_jiffies(20); mutex_lock(&dev->struct_mutex); - seqno = i915_file_priv->mm.last_gem_throttle_seqno; - i915_file_priv->mm.last_gem_throttle_seqno = - i915_file_priv->mm.last_gem_seqno; - if (seqno) - ret = i915_wait_request(dev, seqno); + while (!list_empty(&i915_file_priv->mm.request_list)) { + struct drm_i915_gem_request *request; + + request = list_first_entry(&i915_file_priv->mm.request_list, + struct drm_i915_gem_request, + client_list); + + if (time_after_eq(request->emitted_jiffies, recent_enough)) + break; + + ret = i915_wait_request(dev, request->seqno); + if (ret != 0) + break; + } mutex_unlock(&dev->struct_mutex); + return ret; } @@ -2975,7 +3144,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, reloc_count += exec_list[i].relocation_count; } - *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER); + *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); if (*relocs == NULL) return -ENOMEM; @@ -2989,16 +3158,15 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, exec_list[i].relocation_count * sizeof(**relocs)); if (ret != 0) { - drm_free(*relocs, reloc_count * sizeof(**relocs), - DRM_MEM_DRIVER); + drm_free_large(*relocs); *relocs = NULL; - return ret; + return -EFAULT; } reloc_index += exec_list[i].relocation_count; } - return ret; + return 0; } static int @@ -3007,34 +3175,56 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, struct drm_i915_gem_relocation_entry *relocs) { uint32_t reloc_count = 0, i; - int ret; + int ret = 0; for (i = 0; i < buffer_count; i++) { struct drm_i915_gem_relocation_entry __user *user_relocs; + int unwritten; user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; - if (ret == 0) { - ret = copy_to_user(user_relocs, - &relocs[reloc_count], - exec_list[i].relocation_count * - sizeof(*relocs)); + unwritten = copy_to_user(user_relocs, + &relocs[reloc_count], + exec_list[i].relocation_count * + sizeof(*relocs)); + + if (unwritten) { + ret = -EFAULT; + goto err; } reloc_count += exec_list[i].relocation_count; } - drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER); +err: + drm_free_large(relocs); return ret; } +static int +i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec, + uint64_t exec_offset) +{ + uint32_t exec_start, exec_len; + + exec_start = (uint32_t) exec_offset + exec->batch_start_offset; + exec_len = (uint32_t) exec->batch_len; + + if ((exec_start | exec_len) & 0x7) + return -EINVAL; + + if (!exec_start) + return -EINVAL; + + return 0; +} + int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv) { drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_exec_object *exec_list = NULL; struct drm_gem_object **object_list = NULL; @@ -3057,10 +3247,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } /* Copy in the exec list from userland */ - exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count, - DRM_MEM_DRIVER); - object_list = drm_calloc(sizeof(*object_list), args->buffer_count, - DRM_MEM_DRIVER); + exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count); + object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count); if (exec_list == NULL || object_list == NULL) { DRM_ERROR("Failed to allocate exec or object list " "for %d buffers\n", @@ -3180,8 +3368,20 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, /* Set the pending read domains for the batch buffer to COMMAND */ batch_obj = object_list[args->buffer_count-1]; - batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND; - batch_obj->pending_write_domain = 0; + if (batch_obj->pending_write_domain) { + DRM_ERROR("Attempting to use self-modifying batch buffer\n"); + ret = -EINVAL; + goto err; + } + batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + + /* Sanity check the batch buffer, prior to moving objects */ + exec_offset = exec_list[args->buffer_count - 1].offset; + ret = i915_gem_check_execbuffer (args, exec_offset); + if (ret != 0) { + DRM_ERROR("execbuf with invalid offset/length\n"); + goto err; + } i915_verify_inactive(dev, __FILE__, __LINE__); @@ -3212,7 +3412,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, dev->invalidate_domains, dev->flush_domains); if (dev->flush_domains) - (void)i915_add_request(dev, dev->flush_domains); + (void)i915_add_request(dev, file_priv, + dev->flush_domains); } for (i = 0; i < args->buffer_count; i++) { @@ -3230,10 +3431,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, } #endif - exec_offset = exec_list[args->buffer_count - 1].offset; - #if WATCH_EXEC - i915_gem_dump_object(object_list[args->buffer_count - 1], + i915_gem_dump_object(batch_obj, args->batch_len, __func__, ~0); @@ -3261,9 +3460,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, * *some* interrupts representing completion of buffers that we can * wait on when trying to clear up gtt space). */ - seqno = i915_add_request(dev, flush_domains); + seqno = i915_add_request(dev, file_priv, flush_domains); BUG_ON(seqno == 0); - i915_file_priv->mm.last_gem_seqno = seqno; for (i = 0; i < args->buffer_count; i++) { struct drm_gem_object *obj = object_list[i]; @@ -3298,10 +3496,12 @@ err: (uintptr_t) args->buffers_ptr, exec_list, sizeof(*exec_list) * args->buffer_count); - if (ret) + if (ret) { + ret = -EFAULT; DRM_ERROR("failed to copy %d exec entries " "back to user (%d)\n", args->buffer_count, ret); + } } /* Copy the updated relocations out regardless of current error @@ -3319,10 +3519,8 @@ err: } pre_mutex_err: - drm_free(object_list, sizeof(*object_list) * args->buffer_count, - DRM_MEM_DRIVER); - drm_free(exec_list, sizeof(*exec_list) * args->buffer_count, - DRM_MEM_DRIVER); + drm_free_large(object_list); + drm_free_large(exec_list); drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects, DRM_MEM_DRIVER); @@ -3369,8 +3567,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) atomic_inc(&dev->pin_count); atomic_add(obj->size, &dev->pin_memory); if (!obj_priv->active && - (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | - I915_GEM_DOMAIN_GTT)) == 0 && + (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 && !list_empty(&obj_priv->list)) list_del_init(&obj_priv->list); } @@ -3397,8 +3594,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj) */ if (obj_priv->pin_count == 0) { if (!obj_priv->active && - (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | - I915_GEM_DOMAIN_GTT)) == 0) + (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); atomic_dec(&dev->pin_count); @@ -3502,15 +3698,14 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_gem_object *obj; struct drm_i915_gem_object *obj_priv; - mutex_lock(&dev->struct_mutex); obj = drm_gem_object_lookup(dev, file_priv, args->handle); if (obj == NULL) { DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", args->handle); - mutex_unlock(&dev->struct_mutex); return -EBADF; } + mutex_lock(&dev->struct_mutex); /* Update the active list for the hardware's current position. * Otherwise this only updates on a delayed timer or when irqs are * actually unmasked, and our working set ends up being larger than @@ -3583,6 +3778,7 @@ void i915_gem_free_object(struct drm_gem_object *obj) i915_gem_free_mmap_offset(obj); drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); + kfree(obj_priv->bit_17); drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); } @@ -3648,9 +3844,8 @@ i915_gem_idle(struct drm_device *dev) /* Flush the GPU along with all non-CPU write domains */ - i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), - ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); - seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU); + i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); if (seqno == 0) { mutex_unlock(&dev->struct_mutex); @@ -3679,6 +3874,7 @@ i915_gem_idle(struct drm_device *dev) i915_gem_retire_requests(dev); + spin_lock(&dev_priv->mm.active_list_lock); if (!dev_priv->mm.wedged) { /* Active and flushing should now be empty as we've * waited for a sequence higher than any pending execbuffer @@ -3705,6 +3901,7 @@ i915_gem_idle(struct drm_device *dev) obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; i915_gem_object_move_to_inactive(obj_priv->obj); } + spin_unlock(&dev_priv->mm.active_list_lock); while (!list_empty(&dev_priv->mm.flushing_list)) { struct drm_i915_gem_object *obj_priv; @@ -3950,10 +4147,15 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, dev_priv->mm.suspended = 0; ret = i915_gem_init_ringbuffer(dev); - if (ret != 0) + if (ret != 0) { + mutex_unlock(&dev->struct_mutex); return ret; + } + spin_lock(&dev_priv->mm.active_list_lock); BUG_ON(!list_empty(&dev_priv->mm.active_list)); + spin_unlock(&dev_priv->mm.active_list_lock); + BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); BUG_ON(!list_empty(&dev_priv->mm.request_list)); @@ -3997,6 +4199,7 @@ i915_gem_load(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; + spin_lock_init(&dev_priv->mm.active_list_lock); INIT_LIST_HEAD(&dev_priv->mm.active_list); INIT_LIST_HEAD(&dev_priv->mm.flushing_list); INIT_LIST_HEAD(&dev_priv->mm.inactive_list); @@ -4192,3 +4395,17 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, drm_agp_chipset_flush(dev); return 0; } + +void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) +{ + struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; + + /* Clean up our request list when the client is going away, so that + * later retire_requests won't dereference our soon-to-be-gone + * file_priv. + */ + mutex_lock(&dev->struct_mutex); + while (!list_empty(&i915_file_priv->mm.request_list)) + list_del_init(i915_file_priv->mm.request_list.next); + mutex_unlock(&dev->struct_mutex); +}