i915: Fix up ring initialization to cover G45 oddities
[safe/jmp/linux-2.6] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include <linux/swap.h>
33
34 static int
35 i915_gem_object_set_domain(struct drm_gem_object *obj,
36                             uint32_t read_domains,
37                             uint32_t write_domain);
38 static int
39 i915_gem_object_set_domain_range(struct drm_gem_object *obj,
40                                  uint64_t offset,
41                                  uint64_t size,
42                                  uint32_t read_domains,
43                                  uint32_t write_domain);
44 static int
45 i915_gem_set_domain(struct drm_gem_object *obj,
46                     struct drm_file *file_priv,
47                     uint32_t read_domains,
48                     uint32_t write_domain);
49 static int i915_gem_object_get_page_list(struct drm_gem_object *obj);
50 static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
51 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
52
53 int
54 i915_gem_init_ioctl(struct drm_device *dev, void *data,
55                     struct drm_file *file_priv)
56 {
57         drm_i915_private_t *dev_priv = dev->dev_private;
58         struct drm_i915_gem_init *args = data;
59
60         mutex_lock(&dev->struct_mutex);
61
62         if (args->gtt_start >= args->gtt_end ||
63             (args->gtt_start & (PAGE_SIZE - 1)) != 0 ||
64             (args->gtt_end & (PAGE_SIZE - 1)) != 0) {
65                 mutex_unlock(&dev->struct_mutex);
66                 return -EINVAL;
67         }
68
69         drm_mm_init(&dev_priv->mm.gtt_space, args->gtt_start,
70             args->gtt_end - args->gtt_start);
71
72         dev->gtt_total = (uint32_t) (args->gtt_end - args->gtt_start);
73
74         mutex_unlock(&dev->struct_mutex);
75
76         return 0;
77 }
78
79
80 /**
81  * Creates a new mm object and returns a handle to it.
82  */
83 int
84 i915_gem_create_ioctl(struct drm_device *dev, void *data,
85                       struct drm_file *file_priv)
86 {
87         struct drm_i915_gem_create *args = data;
88         struct drm_gem_object *obj;
89         int handle, ret;
90
91         args->size = roundup(args->size, PAGE_SIZE);
92
93         /* Allocate the new object */
94         obj = drm_gem_object_alloc(dev, args->size);
95         if (obj == NULL)
96                 return -ENOMEM;
97
98         ret = drm_gem_handle_create(file_priv, obj, &handle);
99         mutex_lock(&dev->struct_mutex);
100         drm_gem_object_handle_unreference(obj);
101         mutex_unlock(&dev->struct_mutex);
102
103         if (ret)
104                 return ret;
105
106         args->handle = handle;
107
108         return 0;
109 }
110
111 /**
112  * Reads data from the object referenced by handle.
113  *
114  * On error, the contents of *data are undefined.
115  */
116 int
117 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
118                      struct drm_file *file_priv)
119 {
120         struct drm_i915_gem_pread *args = data;
121         struct drm_gem_object *obj;
122         struct drm_i915_gem_object *obj_priv;
123         ssize_t read;
124         loff_t offset;
125         int ret;
126
127         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
128         if (obj == NULL)
129                 return -EBADF;
130         obj_priv = obj->driver_private;
131
132         /* Bounds check source.
133          *
134          * XXX: This could use review for overflow issues...
135          */
136         if (args->offset > obj->size || args->size > obj->size ||
137             args->offset + args->size > obj->size) {
138                 drm_gem_object_unreference(obj);
139                 return -EINVAL;
140         }
141
142         mutex_lock(&dev->struct_mutex);
143
144         ret = i915_gem_object_set_domain_range(obj, args->offset, args->size,
145                                                I915_GEM_DOMAIN_CPU, 0);
146         if (ret != 0) {
147                 drm_gem_object_unreference(obj);
148                 mutex_unlock(&dev->struct_mutex);
149                 return ret;
150         }
151
152         offset = args->offset;
153
154         read = vfs_read(obj->filp, (char __user *)(uintptr_t)args->data_ptr,
155                         args->size, &offset);
156         if (read != args->size) {
157                 drm_gem_object_unreference(obj);
158                 mutex_unlock(&dev->struct_mutex);
159                 if (read < 0)
160                         return read;
161                 else
162                         return -EINVAL;
163         }
164
165         drm_gem_object_unreference(obj);
166         mutex_unlock(&dev->struct_mutex);
167
168         return 0;
169 }
170
171 static int
172 i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
173                     struct drm_i915_gem_pwrite *args,
174                     struct drm_file *file_priv)
175 {
176         struct drm_i915_gem_object *obj_priv = obj->driver_private;
177         ssize_t remain;
178         loff_t offset;
179         char __user *user_data;
180         char __iomem *vaddr;
181         char *vaddr_atomic;
182         int i, o, l;
183         int ret = 0;
184         unsigned long pfn;
185         unsigned long unwritten;
186
187         user_data = (char __user *) (uintptr_t) args->data_ptr;
188         remain = args->size;
189         if (!access_ok(VERIFY_READ, user_data, remain))
190                 return -EFAULT;
191
192
193         mutex_lock(&dev->struct_mutex);
194         ret = i915_gem_object_pin(obj, 0);
195         if (ret) {
196                 mutex_unlock(&dev->struct_mutex);
197                 return ret;
198         }
199         ret = i915_gem_set_domain(obj, file_priv,
200                                   I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
201         if (ret)
202                 goto fail;
203
204         obj_priv = obj->driver_private;
205         offset = obj_priv->gtt_offset + args->offset;
206         obj_priv->dirty = 1;
207
208         while (remain > 0) {
209                 /* Operation in this page
210                  *
211                  * i = page number
212                  * o = offset within page
213                  * l = bytes to copy
214                  */
215                 i = offset >> PAGE_SHIFT;
216                 o = offset & (PAGE_SIZE-1);
217                 l = remain;
218                 if ((o + l) > PAGE_SIZE)
219                         l = PAGE_SIZE - o;
220
221                 pfn = (dev->agp->base >> PAGE_SHIFT) + i;
222
223 #ifdef CONFIG_HIGHMEM
224                 /* This is a workaround for the low performance of iounmap
225                  * (approximate 10% cpu cost on normal 3D workloads).
226                  * kmap_atomic on HIGHMEM kernels happens to let us map card
227                  * memory without taking IPIs.  When the vmap rework lands
228                  * we should be able to dump this hack.
229                  */
230                 vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
231 #if WATCH_PWRITE
232                 DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
233                          i, o, l, pfn, vaddr_atomic);
234 #endif
235                 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
236                                                               user_data, l);
237                 kunmap_atomic(vaddr_atomic, KM_USER0);
238
239                 if (unwritten)
240 #endif /* CONFIG_HIGHMEM */
241                 {
242                         vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
243 #if WATCH_PWRITE
244                         DRM_INFO("pwrite slow i %d o %d l %d "
245                                  "pfn %ld vaddr %p\n",
246                                  i, o, l, pfn, vaddr);
247 #endif
248                         if (vaddr == NULL) {
249                                 ret = -EFAULT;
250                                 goto fail;
251                         }
252                         unwritten = __copy_from_user(vaddr + o, user_data, l);
253 #if WATCH_PWRITE
254                         DRM_INFO("unwritten %ld\n", unwritten);
255 #endif
256                         iounmap(vaddr);
257                         if (unwritten) {
258                                 ret = -EFAULT;
259                                 goto fail;
260                         }
261                 }
262
263                 remain -= l;
264                 user_data += l;
265                 offset += l;
266         }
267 #if WATCH_PWRITE && 1
268         i915_gem_clflush_object(obj);
269         i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0);
270         i915_gem_clflush_object(obj);
271 #endif
272
273 fail:
274         i915_gem_object_unpin(obj);
275         mutex_unlock(&dev->struct_mutex);
276
277         return ret;
278 }
279
280 static int
281 i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
282                       struct drm_i915_gem_pwrite *args,
283                       struct drm_file *file_priv)
284 {
285         int ret;
286         loff_t offset;
287         ssize_t written;
288
289         mutex_lock(&dev->struct_mutex);
290
291         ret = i915_gem_set_domain(obj, file_priv,
292                                   I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
293         if (ret) {
294                 mutex_unlock(&dev->struct_mutex);
295                 return ret;
296         }
297
298         offset = args->offset;
299
300         written = vfs_write(obj->filp,
301                             (char __user *)(uintptr_t) args->data_ptr,
302                             args->size, &offset);
303         if (written != args->size) {
304                 mutex_unlock(&dev->struct_mutex);
305                 if (written < 0)
306                         return written;
307                 else
308                         return -EINVAL;
309         }
310
311         mutex_unlock(&dev->struct_mutex);
312
313         return 0;
314 }
315
316 /**
317  * Writes data to the object referenced by handle.
318  *
319  * On error, the contents of the buffer that were to be modified are undefined.
320  */
321 int
322 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
323                       struct drm_file *file_priv)
324 {
325         struct drm_i915_gem_pwrite *args = data;
326         struct drm_gem_object *obj;
327         struct drm_i915_gem_object *obj_priv;
328         int ret = 0;
329
330         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
331         if (obj == NULL)
332                 return -EBADF;
333         obj_priv = obj->driver_private;
334
335         /* Bounds check destination.
336          *
337          * XXX: This could use review for overflow issues...
338          */
339         if (args->offset > obj->size || args->size > obj->size ||
340             args->offset + args->size > obj->size) {
341                 drm_gem_object_unreference(obj);
342                 return -EINVAL;
343         }
344
345         /* We can only do the GTT pwrite on untiled buffers, as otherwise
346          * it would end up going through the fenced access, and we'll get
347          * different detiling behavior between reading and writing.
348          * pread/pwrite currently are reading and writing from the CPU
349          * perspective, requiring manual detiling by the client.
350          */
351         if (obj_priv->tiling_mode == I915_TILING_NONE &&
352             dev->gtt_total != 0)
353                 ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv);
354         else
355                 ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv);
356
357 #if WATCH_PWRITE
358         if (ret)
359                 DRM_INFO("pwrite failed %d\n", ret);
360 #endif
361
362         drm_gem_object_unreference(obj);
363
364         return ret;
365 }
366
367 /**
368  * Called when user space prepares to use an object
369  */
370 int
371 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
372                           struct drm_file *file_priv)
373 {
374         struct drm_i915_gem_set_domain *args = data;
375         struct drm_gem_object *obj;
376         int ret;
377
378         if (!(dev->driver->driver_features & DRIVER_GEM))
379                 return -ENODEV;
380
381         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
382         if (obj == NULL)
383                 return -EBADF;
384
385         mutex_lock(&dev->struct_mutex);
386 #if WATCH_BUF
387         DRM_INFO("set_domain_ioctl %p(%d), %08x %08x\n",
388                  obj, obj->size, args->read_domains, args->write_domain);
389 #endif
390         ret = i915_gem_set_domain(obj, file_priv,
391                                   args->read_domains, args->write_domain);
392         drm_gem_object_unreference(obj);
393         mutex_unlock(&dev->struct_mutex);
394         return ret;
395 }
396
397 /**
398  * Called when user space has done writes to this buffer
399  */
400 int
401 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
402                       struct drm_file *file_priv)
403 {
404         struct drm_i915_gem_sw_finish *args = data;
405         struct drm_gem_object *obj;
406         struct drm_i915_gem_object *obj_priv;
407         int ret = 0;
408
409         if (!(dev->driver->driver_features & DRIVER_GEM))
410                 return -ENODEV;
411
412         mutex_lock(&dev->struct_mutex);
413         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
414         if (obj == NULL) {
415                 mutex_unlock(&dev->struct_mutex);
416                 return -EBADF;
417         }
418
419 #if WATCH_BUF
420         DRM_INFO("%s: sw_finish %d (%p %d)\n",
421                  __func__, args->handle, obj, obj->size);
422 #endif
423         obj_priv = obj->driver_private;
424
425         /* Pinned buffers may be scanout, so flush the cache */
426         if ((obj->write_domain & I915_GEM_DOMAIN_CPU) && obj_priv->pin_count) {
427                 i915_gem_clflush_object(obj);
428                 drm_agp_chipset_flush(dev);
429         }
430         drm_gem_object_unreference(obj);
431         mutex_unlock(&dev->struct_mutex);
432         return ret;
433 }
434
435 /**
436  * Maps the contents of an object, returning the address it is mapped
437  * into.
438  *
439  * While the mapping holds a reference on the contents of the object, it doesn't
440  * imply a ref on the object itself.
441  */
442 int
443 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
444                    struct drm_file *file_priv)
445 {
446         struct drm_i915_gem_mmap *args = data;
447         struct drm_gem_object *obj;
448         loff_t offset;
449         unsigned long addr;
450
451         if (!(dev->driver->driver_features & DRIVER_GEM))
452                 return -ENODEV;
453
454         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
455         if (obj == NULL)
456                 return -EBADF;
457
458         offset = args->offset;
459
460         down_write(&current->mm->mmap_sem);
461         addr = do_mmap(obj->filp, 0, args->size,
462                        PROT_READ | PROT_WRITE, MAP_SHARED,
463                        args->offset);
464         up_write(&current->mm->mmap_sem);
465         mutex_lock(&dev->struct_mutex);
466         drm_gem_object_unreference(obj);
467         mutex_unlock(&dev->struct_mutex);
468         if (IS_ERR((void *)addr))
469                 return addr;
470
471         args->addr_ptr = (uint64_t) addr;
472
473         return 0;
474 }
475
476 static void
477 i915_gem_object_free_page_list(struct drm_gem_object *obj)
478 {
479         struct drm_i915_gem_object *obj_priv = obj->driver_private;
480         int page_count = obj->size / PAGE_SIZE;
481         int i;
482
483         if (obj_priv->page_list == NULL)
484                 return;
485
486
487         for (i = 0; i < page_count; i++)
488                 if (obj_priv->page_list[i] != NULL) {
489                         if (obj_priv->dirty)
490                                 set_page_dirty(obj_priv->page_list[i]);
491                         mark_page_accessed(obj_priv->page_list[i]);
492                         page_cache_release(obj_priv->page_list[i]);
493                 }
494         obj_priv->dirty = 0;
495
496         drm_free(obj_priv->page_list,
497                  page_count * sizeof(struct page *),
498                  DRM_MEM_DRIVER);
499         obj_priv->page_list = NULL;
500 }
501
502 static void
503 i915_gem_object_move_to_active(struct drm_gem_object *obj)
504 {
505         struct drm_device *dev = obj->dev;
506         drm_i915_private_t *dev_priv = dev->dev_private;
507         struct drm_i915_gem_object *obj_priv = obj->driver_private;
508
509         /* Add a reference if we're newly entering the active list. */
510         if (!obj_priv->active) {
511                 drm_gem_object_reference(obj);
512                 obj_priv->active = 1;
513         }
514         /* Move from whatever list we were on to the tail of execution. */
515         list_move_tail(&obj_priv->list,
516                        &dev_priv->mm.active_list);
517 }
518
519
520 static void
521 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
522 {
523         struct drm_device *dev = obj->dev;
524         drm_i915_private_t *dev_priv = dev->dev_private;
525         struct drm_i915_gem_object *obj_priv = obj->driver_private;
526
527         i915_verify_inactive(dev, __FILE__, __LINE__);
528         if (obj_priv->pin_count != 0)
529                 list_del_init(&obj_priv->list);
530         else
531                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
532
533         if (obj_priv->active) {
534                 obj_priv->active = 0;
535                 drm_gem_object_unreference(obj);
536         }
537         i915_verify_inactive(dev, __FILE__, __LINE__);
538 }
539
540 /**
541  * Creates a new sequence number, emitting a write of it to the status page
542  * plus an interrupt, which will trigger i915_user_interrupt_handler.
543  *
544  * Must be called with struct_lock held.
545  *
546  * Returned sequence numbers are nonzero on success.
547  */
548 static uint32_t
549 i915_add_request(struct drm_device *dev, uint32_t flush_domains)
550 {
551         drm_i915_private_t *dev_priv = dev->dev_private;
552         struct drm_i915_gem_request *request;
553         uint32_t seqno;
554         int was_empty;
555         RING_LOCALS;
556
557         request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
558         if (request == NULL)
559                 return 0;
560
561         /* Grab the seqno we're going to make this request be, and bump the
562          * next (skipping 0 so it can be the reserved no-seqno value).
563          */
564         seqno = dev_priv->mm.next_gem_seqno;
565         dev_priv->mm.next_gem_seqno++;
566         if (dev_priv->mm.next_gem_seqno == 0)
567                 dev_priv->mm.next_gem_seqno++;
568
569         BEGIN_LP_RING(4);
570         OUT_RING(MI_STORE_DWORD_INDEX);
571         OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
572         OUT_RING(seqno);
573
574         OUT_RING(MI_USER_INTERRUPT);
575         ADVANCE_LP_RING();
576
577         DRM_DEBUG("%d\n", seqno);
578
579         request->seqno = seqno;
580         request->emitted_jiffies = jiffies;
581         request->flush_domains = flush_domains;
582         was_empty = list_empty(&dev_priv->mm.request_list);
583         list_add_tail(&request->list, &dev_priv->mm.request_list);
584
585         if (was_empty)
586                 schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
587         return seqno;
588 }
589
590 /**
591  * Command execution barrier
592  *
593  * Ensures that all commands in the ring are finished
594  * before signalling the CPU
595  */
596 static uint32_t
597 i915_retire_commands(struct drm_device *dev)
598 {
599         drm_i915_private_t *dev_priv = dev->dev_private;
600         uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
601         uint32_t flush_domains = 0;
602         RING_LOCALS;
603
604         /* The sampler always gets flushed on i965 (sigh) */
605         if (IS_I965G(dev))
606                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
607         BEGIN_LP_RING(2);
608         OUT_RING(cmd);
609         OUT_RING(0); /* noop */
610         ADVANCE_LP_RING();
611         return flush_domains;
612 }
613
614 /**
615  * Moves buffers associated only with the given active seqno from the active
616  * to inactive list, potentially freeing them.
617  */
618 static void
619 i915_gem_retire_request(struct drm_device *dev,
620                         struct drm_i915_gem_request *request)
621 {
622         drm_i915_private_t *dev_priv = dev->dev_private;
623
624         /* Move any buffers on the active list that are no longer referenced
625          * by the ringbuffer to the flushing/inactive lists as appropriate.
626          */
627         while (!list_empty(&dev_priv->mm.active_list)) {
628                 struct drm_gem_object *obj;
629                 struct drm_i915_gem_object *obj_priv;
630
631                 obj_priv = list_first_entry(&dev_priv->mm.active_list,
632                                             struct drm_i915_gem_object,
633                                             list);
634                 obj = obj_priv->obj;
635
636                 /* If the seqno being retired doesn't match the oldest in the
637                  * list, then the oldest in the list must still be newer than
638                  * this seqno.
639                  */
640                 if (obj_priv->last_rendering_seqno != request->seqno)
641                         return;
642 #if WATCH_LRU
643                 DRM_INFO("%s: retire %d moves to inactive list %p\n",
644                          __func__, request->seqno, obj);
645 #endif
646
647                 if (obj->write_domain != 0) {
648                         list_move_tail(&obj_priv->list,
649                                        &dev_priv->mm.flushing_list);
650                 } else {
651                         i915_gem_object_move_to_inactive(obj);
652                 }
653         }
654
655         if (request->flush_domains != 0) {
656                 struct drm_i915_gem_object *obj_priv, *next;
657
658                 /* Clear the write domain and activity from any buffers
659                  * that are just waiting for a flush matching the one retired.
660                  */
661                 list_for_each_entry_safe(obj_priv, next,
662                                          &dev_priv->mm.flushing_list, list) {
663                         struct drm_gem_object *obj = obj_priv->obj;
664
665                         if (obj->write_domain & request->flush_domains) {
666                                 obj->write_domain = 0;
667                                 i915_gem_object_move_to_inactive(obj);
668                         }
669                 }
670
671         }
672 }
673
674 /**
675  * Returns true if seq1 is later than seq2.
676  */
677 static int
678 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
679 {
680         return (int32_t)(seq1 - seq2) >= 0;
681 }
682
683 uint32_t
684 i915_get_gem_seqno(struct drm_device *dev)
685 {
686         drm_i915_private_t *dev_priv = dev->dev_private;
687
688         return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
689 }
690
691 /**
692  * This function clears the request list as sequence numbers are passed.
693  */
694 void
695 i915_gem_retire_requests(struct drm_device *dev)
696 {
697         drm_i915_private_t *dev_priv = dev->dev_private;
698         uint32_t seqno;
699
700         seqno = i915_get_gem_seqno(dev);
701
702         while (!list_empty(&dev_priv->mm.request_list)) {
703                 struct drm_i915_gem_request *request;
704                 uint32_t retiring_seqno;
705
706                 request = list_first_entry(&dev_priv->mm.request_list,
707                                            struct drm_i915_gem_request,
708                                            list);
709                 retiring_seqno = request->seqno;
710
711                 if (i915_seqno_passed(seqno, retiring_seqno) ||
712                     dev_priv->mm.wedged) {
713                         i915_gem_retire_request(dev, request);
714
715                         list_del(&request->list);
716                         drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
717                 } else
718                         break;
719         }
720 }
721
722 void
723 i915_gem_retire_work_handler(struct work_struct *work)
724 {
725         drm_i915_private_t *dev_priv;
726         struct drm_device *dev;
727
728         dev_priv = container_of(work, drm_i915_private_t,
729                                 mm.retire_work.work);
730         dev = dev_priv->dev;
731
732         mutex_lock(&dev->struct_mutex);
733         i915_gem_retire_requests(dev);
734         if (!list_empty(&dev_priv->mm.request_list))
735                 schedule_delayed_work(&dev_priv->mm.retire_work, HZ);
736         mutex_unlock(&dev->struct_mutex);
737 }
738
739 /**
740  * Waits for a sequence number to be signaled, and cleans up the
741  * request and object lists appropriately for that event.
742  */
743 static int
744 i915_wait_request(struct drm_device *dev, uint32_t seqno)
745 {
746         drm_i915_private_t *dev_priv = dev->dev_private;
747         int ret = 0;
748
749         BUG_ON(seqno == 0);
750
751         if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
752                 dev_priv->mm.waiting_gem_seqno = seqno;
753                 i915_user_irq_get(dev);
754                 ret = wait_event_interruptible(dev_priv->irq_queue,
755                                                i915_seqno_passed(i915_get_gem_seqno(dev),
756                                                                  seqno) ||
757                                                dev_priv->mm.wedged);
758                 i915_user_irq_put(dev);
759                 dev_priv->mm.waiting_gem_seqno = 0;
760         }
761         if (dev_priv->mm.wedged)
762                 ret = -EIO;
763
764         if (ret && ret != -ERESTARTSYS)
765                 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
766                           __func__, ret, seqno, i915_get_gem_seqno(dev));
767
768         /* Directly dispatch request retiring.  While we have the work queue
769          * to handle this, the waiter on a request often wants an associated
770          * buffer to have made it to the inactive list, and we would need
771          * a separate wait queue to handle that.
772          */
773         if (ret == 0)
774                 i915_gem_retire_requests(dev);
775
776         return ret;
777 }
778
779 static void
780 i915_gem_flush(struct drm_device *dev,
781                uint32_t invalidate_domains,
782                uint32_t flush_domains)
783 {
784         drm_i915_private_t *dev_priv = dev->dev_private;
785         uint32_t cmd;
786         RING_LOCALS;
787
788 #if WATCH_EXEC
789         DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
790                   invalidate_domains, flush_domains);
791 #endif
792
793         if (flush_domains & I915_GEM_DOMAIN_CPU)
794                 drm_agp_chipset_flush(dev);
795
796         if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU |
797                                                      I915_GEM_DOMAIN_GTT)) {
798                 /*
799                  * read/write caches:
800                  *
801                  * I915_GEM_DOMAIN_RENDER is always invalidated, but is
802                  * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
803                  * also flushed at 2d versus 3d pipeline switches.
804                  *
805                  * read-only caches:
806                  *
807                  * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
808                  * MI_READ_FLUSH is set, and is always flushed on 965.
809                  *
810                  * I915_GEM_DOMAIN_COMMAND may not exist?
811                  *
812                  * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
813                  * invalidated when MI_EXE_FLUSH is set.
814                  *
815                  * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
816                  * invalidated with every MI_FLUSH.
817                  *
818                  * TLBs:
819                  *
820                  * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
821                  * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
822                  * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
823                  * are flushed at any MI_FLUSH.
824                  */
825
826                 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
827                 if ((invalidate_domains|flush_domains) &
828                     I915_GEM_DOMAIN_RENDER)
829                         cmd &= ~MI_NO_WRITE_FLUSH;
830                 if (!IS_I965G(dev)) {
831                         /*
832                          * On the 965, the sampler cache always gets flushed
833                          * and this bit is reserved.
834                          */
835                         if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
836                                 cmd |= MI_READ_FLUSH;
837                 }
838                 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
839                         cmd |= MI_EXE_FLUSH;
840
841 #if WATCH_EXEC
842                 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
843 #endif
844                 BEGIN_LP_RING(2);
845                 OUT_RING(cmd);
846                 OUT_RING(0); /* noop */
847                 ADVANCE_LP_RING();
848         }
849 }
850
851 /**
852  * Ensures that all rendering to the object has completed and the object is
853  * safe to unbind from the GTT or access from the CPU.
854  */
855 static int
856 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
857 {
858         struct drm_device *dev = obj->dev;
859         struct drm_i915_gem_object *obj_priv = obj->driver_private;
860         int ret;
861
862         /* If there are writes queued to the buffer, flush and
863          * create a new seqno to wait for.
864          */
865         if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) {
866                 uint32_t write_domain = obj->write_domain;
867 #if WATCH_BUF
868                 DRM_INFO("%s: flushing object %p from write domain %08x\n",
869                           __func__, obj, write_domain);
870 #endif
871                 i915_gem_flush(dev, 0, write_domain);
872
873                 i915_gem_object_move_to_active(obj);
874                 obj_priv->last_rendering_seqno = i915_add_request(dev,
875                                                                   write_domain);
876                 BUG_ON(obj_priv->last_rendering_seqno == 0);
877 #if WATCH_LRU
878                 DRM_INFO("%s: flush moves to exec list %p\n", __func__, obj);
879 #endif
880         }
881
882         /* If there is rendering queued on the buffer being evicted, wait for
883          * it.
884          */
885         if (obj_priv->active) {
886 #if WATCH_BUF
887                 DRM_INFO("%s: object %p wait for seqno %08x\n",
888                           __func__, obj, obj_priv->last_rendering_seqno);
889 #endif
890                 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
891                 if (ret != 0)
892                         return ret;
893         }
894
895         return 0;
896 }
897
898 /**
899  * Unbinds an object from the GTT aperture.
900  */
901 static int
902 i915_gem_object_unbind(struct drm_gem_object *obj)
903 {
904         struct drm_device *dev = obj->dev;
905         struct drm_i915_gem_object *obj_priv = obj->driver_private;
906         int ret = 0;
907
908 #if WATCH_BUF
909         DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
910         DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
911 #endif
912         if (obj_priv->gtt_space == NULL)
913                 return 0;
914
915         if (obj_priv->pin_count != 0) {
916                 DRM_ERROR("Attempting to unbind pinned buffer\n");
917                 return -EINVAL;
918         }
919
920         /* Wait for any rendering to complete
921          */
922         ret = i915_gem_object_wait_rendering(obj);
923         if (ret) {
924                 DRM_ERROR("wait_rendering failed: %d\n", ret);
925                 return ret;
926         }
927
928         /* Move the object to the CPU domain to ensure that
929          * any possible CPU writes while it's not in the GTT
930          * are flushed when we go to remap it. This will
931          * also ensure that all pending GPU writes are finished
932          * before we unbind.
933          */
934         ret = i915_gem_object_set_domain(obj, I915_GEM_DOMAIN_CPU,
935                                          I915_GEM_DOMAIN_CPU);
936         if (ret) {
937                 DRM_ERROR("set_domain failed: %d\n", ret);
938                 return ret;
939         }
940
941         if (obj_priv->agp_mem != NULL) {
942                 drm_unbind_agp(obj_priv->agp_mem);
943                 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
944                 obj_priv->agp_mem = NULL;
945         }
946
947         BUG_ON(obj_priv->active);
948
949         i915_gem_object_free_page_list(obj);
950
951         if (obj_priv->gtt_space) {
952                 atomic_dec(&dev->gtt_count);
953                 atomic_sub(obj->size, &dev->gtt_memory);
954
955                 drm_mm_put_block(obj_priv->gtt_space);
956                 obj_priv->gtt_space = NULL;
957         }
958
959         /* Remove ourselves from the LRU list if present. */
960         if (!list_empty(&obj_priv->list))
961                 list_del_init(&obj_priv->list);
962
963         return 0;
964 }
965
966 static int
967 i915_gem_evict_something(struct drm_device *dev)
968 {
969         drm_i915_private_t *dev_priv = dev->dev_private;
970         struct drm_gem_object *obj;
971         struct drm_i915_gem_object *obj_priv;
972         int ret = 0;
973
974         for (;;) {
975                 /* If there's an inactive buffer available now, grab it
976                  * and be done.
977                  */
978                 if (!list_empty(&dev_priv->mm.inactive_list)) {
979                         obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
980                                                     struct drm_i915_gem_object,
981                                                     list);
982                         obj = obj_priv->obj;
983                         BUG_ON(obj_priv->pin_count != 0);
984 #if WATCH_LRU
985                         DRM_INFO("%s: evicting %p\n", __func__, obj);
986 #endif
987                         BUG_ON(obj_priv->active);
988
989                         /* Wait on the rendering and unbind the buffer. */
990                         ret = i915_gem_object_unbind(obj);
991                         break;
992                 }
993
994                 /* If we didn't get anything, but the ring is still processing
995                  * things, wait for one of those things to finish and hopefully
996                  * leave us a buffer to evict.
997                  */
998                 if (!list_empty(&dev_priv->mm.request_list)) {
999                         struct drm_i915_gem_request *request;
1000
1001                         request = list_first_entry(&dev_priv->mm.request_list,
1002                                                    struct drm_i915_gem_request,
1003                                                    list);
1004
1005                         ret = i915_wait_request(dev, request->seqno);
1006                         if (ret)
1007                                 break;
1008
1009                         /* if waiting caused an object to become inactive,
1010                          * then loop around and wait for it. Otherwise, we
1011                          * assume that waiting freed and unbound something,
1012                          * so there should now be some space in the GTT
1013                          */
1014                         if (!list_empty(&dev_priv->mm.inactive_list))
1015                                 continue;
1016                         break;
1017                 }
1018
1019                 /* If we didn't have anything on the request list but there
1020                  * are buffers awaiting a flush, emit one and try again.
1021                  * When we wait on it, those buffers waiting for that flush
1022                  * will get moved to inactive.
1023                  */
1024                 if (!list_empty(&dev_priv->mm.flushing_list)) {
1025                         obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
1026                                                     struct drm_i915_gem_object,
1027                                                     list);
1028                         obj = obj_priv->obj;
1029
1030                         i915_gem_flush(dev,
1031                                        obj->write_domain,
1032                                        obj->write_domain);
1033                         i915_add_request(dev, obj->write_domain);
1034
1035                         obj = NULL;
1036                         continue;
1037                 }
1038
1039                 DRM_ERROR("inactive empty %d request empty %d "
1040                           "flushing empty %d\n",
1041                           list_empty(&dev_priv->mm.inactive_list),
1042                           list_empty(&dev_priv->mm.request_list),
1043                           list_empty(&dev_priv->mm.flushing_list));
1044                 /* If we didn't do any of the above, there's nothing to be done
1045                  * and we just can't fit it in.
1046                  */
1047                 return -ENOMEM;
1048         }
1049         return ret;
1050 }
1051
1052 static int
1053 i915_gem_object_get_page_list(struct drm_gem_object *obj)
1054 {
1055         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1056         int page_count, i;
1057         struct address_space *mapping;
1058         struct inode *inode;
1059         struct page *page;
1060         int ret;
1061
1062         if (obj_priv->page_list)
1063                 return 0;
1064
1065         /* Get the list of pages out of our struct file.  They'll be pinned
1066          * at this point until we release them.
1067          */
1068         page_count = obj->size / PAGE_SIZE;
1069         BUG_ON(obj_priv->page_list != NULL);
1070         obj_priv->page_list = drm_calloc(page_count, sizeof(struct page *),
1071                                          DRM_MEM_DRIVER);
1072         if (obj_priv->page_list == NULL) {
1073                 DRM_ERROR("Faled to allocate page list\n");
1074                 return -ENOMEM;
1075         }
1076
1077         inode = obj->filp->f_path.dentry->d_inode;
1078         mapping = inode->i_mapping;
1079         for (i = 0; i < page_count; i++) {
1080                 page = read_mapping_page(mapping, i, NULL);
1081                 if (IS_ERR(page)) {
1082                         ret = PTR_ERR(page);
1083                         DRM_ERROR("read_mapping_page failed: %d\n", ret);
1084                         i915_gem_object_free_page_list(obj);
1085                         return ret;
1086                 }
1087                 obj_priv->page_list[i] = page;
1088         }
1089         return 0;
1090 }
1091
1092 /**
1093  * Finds free space in the GTT aperture and binds the object there.
1094  */
1095 static int
1096 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
1097 {
1098         struct drm_device *dev = obj->dev;
1099         drm_i915_private_t *dev_priv = dev->dev_private;
1100         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1101         struct drm_mm_node *free_space;
1102         int page_count, ret;
1103
1104         if (alignment == 0)
1105                 alignment = PAGE_SIZE;
1106         if (alignment & (PAGE_SIZE - 1)) {
1107                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
1108                 return -EINVAL;
1109         }
1110
1111  search_free:
1112         free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
1113                                         obj->size, alignment, 0);
1114         if (free_space != NULL) {
1115                 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
1116                                                        alignment);
1117                 if (obj_priv->gtt_space != NULL) {
1118                         obj_priv->gtt_space->private = obj;
1119                         obj_priv->gtt_offset = obj_priv->gtt_space->start;
1120                 }
1121         }
1122         if (obj_priv->gtt_space == NULL) {
1123                 /* If the gtt is empty and we're still having trouble
1124                  * fitting our object in, we're out of memory.
1125                  */
1126 #if WATCH_LRU
1127                 DRM_INFO("%s: GTT full, evicting something\n", __func__);
1128 #endif
1129                 if (list_empty(&dev_priv->mm.inactive_list) &&
1130                     list_empty(&dev_priv->mm.flushing_list) &&
1131                     list_empty(&dev_priv->mm.active_list)) {
1132                         DRM_ERROR("GTT full, but LRU list empty\n");
1133                         return -ENOMEM;
1134                 }
1135
1136                 ret = i915_gem_evict_something(dev);
1137                 if (ret != 0) {
1138                         DRM_ERROR("Failed to evict a buffer %d\n", ret);
1139                         return ret;
1140                 }
1141                 goto search_free;
1142         }
1143
1144 #if WATCH_BUF
1145         DRM_INFO("Binding object of size %d at 0x%08x\n",
1146                  obj->size, obj_priv->gtt_offset);
1147 #endif
1148         ret = i915_gem_object_get_page_list(obj);
1149         if (ret) {
1150                 drm_mm_put_block(obj_priv->gtt_space);
1151                 obj_priv->gtt_space = NULL;
1152                 return ret;
1153         }
1154
1155         page_count = obj->size / PAGE_SIZE;
1156         /* Create an AGP memory structure pointing at our pages, and bind it
1157          * into the GTT.
1158          */
1159         obj_priv->agp_mem = drm_agp_bind_pages(dev,
1160                                                obj_priv->page_list,
1161                                                page_count,
1162                                                obj_priv->gtt_offset);
1163         if (obj_priv->agp_mem == NULL) {
1164                 i915_gem_object_free_page_list(obj);
1165                 drm_mm_put_block(obj_priv->gtt_space);
1166                 obj_priv->gtt_space = NULL;
1167                 return -ENOMEM;
1168         }
1169         atomic_inc(&dev->gtt_count);
1170         atomic_add(obj->size, &dev->gtt_memory);
1171
1172         /* Assert that the object is not currently in any GPU domain. As it
1173          * wasn't in the GTT, there shouldn't be any way it could have been in
1174          * a GPU cache
1175          */
1176         BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
1177         BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
1178
1179         return 0;
1180 }
1181
1182 void
1183 i915_gem_clflush_object(struct drm_gem_object *obj)
1184 {
1185         struct drm_i915_gem_object      *obj_priv = obj->driver_private;
1186
1187         /* If we don't have a page list set up, then we're not pinned
1188          * to GPU, and we can ignore the cache flush because it'll happen
1189          * again at bind time.
1190          */
1191         if (obj_priv->page_list == NULL)
1192                 return;
1193
1194         drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE);
1195 }
1196
1197 /*
1198  * Set the next domain for the specified object. This
1199  * may not actually perform the necessary flushing/invaliding though,
1200  * as that may want to be batched with other set_domain operations
1201  *
1202  * This is (we hope) the only really tricky part of gem. The goal
1203  * is fairly simple -- track which caches hold bits of the object
1204  * and make sure they remain coherent. A few concrete examples may
1205  * help to explain how it works. For shorthand, we use the notation
1206  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
1207  * a pair of read and write domain masks.
1208  *
1209  * Case 1: the batch buffer
1210  *
1211  *      1. Allocated
1212  *      2. Written by CPU
1213  *      3. Mapped to GTT
1214  *      4. Read by GPU
1215  *      5. Unmapped from GTT
1216  *      6. Freed
1217  *
1218  *      Let's take these a step at a time
1219  *
1220  *      1. Allocated
1221  *              Pages allocated from the kernel may still have
1222  *              cache contents, so we set them to (CPU, CPU) always.
1223  *      2. Written by CPU (using pwrite)
1224  *              The pwrite function calls set_domain (CPU, CPU) and
1225  *              this function does nothing (as nothing changes)
1226  *      3. Mapped by GTT
1227  *              This function asserts that the object is not
1228  *              currently in any GPU-based read or write domains
1229  *      4. Read by GPU
1230  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
1231  *              As write_domain is zero, this function adds in the
1232  *              current read domains (CPU+COMMAND, 0).
1233  *              flush_domains is set to CPU.
1234  *              invalidate_domains is set to COMMAND
1235  *              clflush is run to get data out of the CPU caches
1236  *              then i915_dev_set_domain calls i915_gem_flush to
1237  *              emit an MI_FLUSH and drm_agp_chipset_flush
1238  *      5. Unmapped from GTT
1239  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
1240  *              flush_domains and invalidate_domains end up both zero
1241  *              so no flushing/invalidating happens
1242  *      6. Freed
1243  *              yay, done
1244  *
1245  * Case 2: The shared render buffer
1246  *
1247  *      1. Allocated
1248  *      2. Mapped to GTT
1249  *      3. Read/written by GPU
1250  *      4. set_domain to (CPU,CPU)
1251  *      5. Read/written by CPU
1252  *      6. Read/written by GPU
1253  *
1254  *      1. Allocated
1255  *              Same as last example, (CPU, CPU)
1256  *      2. Mapped to GTT
1257  *              Nothing changes (assertions find that it is not in the GPU)
1258  *      3. Read/written by GPU
1259  *              execbuffer calls set_domain (RENDER, RENDER)
1260  *              flush_domains gets CPU
1261  *              invalidate_domains gets GPU
1262  *              clflush (obj)
1263  *              MI_FLUSH and drm_agp_chipset_flush
1264  *      4. set_domain (CPU, CPU)
1265  *              flush_domains gets GPU
1266  *              invalidate_domains gets CPU
1267  *              wait_rendering (obj) to make sure all drawing is complete.
1268  *              This will include an MI_FLUSH to get the data from GPU
1269  *              to memory
1270  *              clflush (obj) to invalidate the CPU cache
1271  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
1272  *      5. Read/written by CPU
1273  *              cache lines are loaded and dirtied
1274  *      6. Read written by GPU
1275  *              Same as last GPU access
1276  *
1277  * Case 3: The constant buffer
1278  *
1279  *      1. Allocated
1280  *      2. Written by CPU
1281  *      3. Read by GPU
1282  *      4. Updated (written) by CPU again
1283  *      5. Read by GPU
1284  *
1285  *      1. Allocated
1286  *              (CPU, CPU)
1287  *      2. Written by CPU
1288  *              (CPU, CPU)
1289  *      3. Read by GPU
1290  *              (CPU+RENDER, 0)
1291  *              flush_domains = CPU
1292  *              invalidate_domains = RENDER
1293  *              clflush (obj)
1294  *              MI_FLUSH
1295  *              drm_agp_chipset_flush
1296  *      4. Updated (written) by CPU again
1297  *              (CPU, CPU)
1298  *              flush_domains = 0 (no previous write domain)
1299  *              invalidate_domains = 0 (no new read domains)
1300  *      5. Read by GPU
1301  *              (CPU+RENDER, 0)
1302  *              flush_domains = CPU
1303  *              invalidate_domains = RENDER
1304  *              clflush (obj)
1305  *              MI_FLUSH
1306  *              drm_agp_chipset_flush
1307  */
1308 static int
1309 i915_gem_object_set_domain(struct drm_gem_object *obj,
1310                             uint32_t read_domains,
1311                             uint32_t write_domain)
1312 {
1313         struct drm_device               *dev = obj->dev;
1314         struct drm_i915_gem_object      *obj_priv = obj->driver_private;
1315         uint32_t                        invalidate_domains = 0;
1316         uint32_t                        flush_domains = 0;
1317         int                             ret;
1318
1319 #if WATCH_BUF
1320         DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
1321                  __func__, obj,
1322                  obj->read_domains, read_domains,
1323                  obj->write_domain, write_domain);
1324 #endif
1325         /*
1326          * If the object isn't moving to a new write domain,
1327          * let the object stay in multiple read domains
1328          */
1329         if (write_domain == 0)
1330                 read_domains |= obj->read_domains;
1331         else
1332                 obj_priv->dirty = 1;
1333
1334         /*
1335          * Flush the current write domain if
1336          * the new read domains don't match. Invalidate
1337          * any read domains which differ from the old
1338          * write domain
1339          */
1340         if (obj->write_domain && obj->write_domain != read_domains) {
1341                 flush_domains |= obj->write_domain;
1342                 invalidate_domains |= read_domains & ~obj->write_domain;
1343         }
1344         /*
1345          * Invalidate any read caches which may have
1346          * stale data. That is, any new read domains.
1347          */
1348         invalidate_domains |= read_domains & ~obj->read_domains;
1349         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
1350 #if WATCH_BUF
1351                 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
1352                          __func__, flush_domains, invalidate_domains);
1353 #endif
1354                 /*
1355                  * If we're invaliding the CPU cache and flushing a GPU cache,
1356                  * then pause for rendering so that the GPU caches will be
1357                  * flushed before the cpu cache is invalidated
1358                  */
1359                 if ((invalidate_domains & I915_GEM_DOMAIN_CPU) &&
1360                     (flush_domains & ~(I915_GEM_DOMAIN_CPU |
1361                                        I915_GEM_DOMAIN_GTT))) {
1362                         ret = i915_gem_object_wait_rendering(obj);
1363                         if (ret)
1364                                 return ret;
1365                 }
1366                 i915_gem_clflush_object(obj);
1367         }
1368
1369         if ((write_domain | flush_domains) != 0)
1370                 obj->write_domain = write_domain;
1371
1372         /* If we're invalidating the CPU domain, clear the per-page CPU
1373          * domain list as well.
1374          */
1375         if (obj_priv->page_cpu_valid != NULL &&
1376             (write_domain != 0 ||
1377              read_domains & I915_GEM_DOMAIN_CPU)) {
1378                 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE,
1379                          DRM_MEM_DRIVER);
1380                 obj_priv->page_cpu_valid = NULL;
1381         }
1382         obj->read_domains = read_domains;
1383
1384         dev->invalidate_domains |= invalidate_domains;
1385         dev->flush_domains |= flush_domains;
1386 #if WATCH_BUF
1387         DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
1388                  __func__,
1389                  obj->read_domains, obj->write_domain,
1390                  dev->invalidate_domains, dev->flush_domains);
1391 #endif
1392         return 0;
1393 }
1394
1395 /**
1396  * Set the read/write domain on a range of the object.
1397  *
1398  * Currently only implemented for CPU reads, otherwise drops to normal
1399  * i915_gem_object_set_domain().
1400  */
1401 static int
1402 i915_gem_object_set_domain_range(struct drm_gem_object *obj,
1403                                  uint64_t offset,
1404                                  uint64_t size,
1405                                  uint32_t read_domains,
1406                                  uint32_t write_domain)
1407 {
1408         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1409         int ret, i;
1410
1411         if (obj->read_domains & I915_GEM_DOMAIN_CPU)
1412                 return 0;
1413
1414         if (read_domains != I915_GEM_DOMAIN_CPU ||
1415             write_domain != 0)
1416                 return i915_gem_object_set_domain(obj,
1417                                                   read_domains, write_domain);
1418
1419         /* Wait on any GPU rendering to the object to be flushed. */
1420         if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) {
1421                 ret = i915_gem_object_wait_rendering(obj);
1422                 if (ret)
1423                         return ret;
1424         }
1425
1426         if (obj_priv->page_cpu_valid == NULL) {
1427                 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE,
1428                                                       DRM_MEM_DRIVER);
1429         }
1430
1431         /* Flush the cache on any pages that are still invalid from the CPU's
1432          * perspective.
1433          */
1434         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; i++) {
1435                 if (obj_priv->page_cpu_valid[i])
1436                         continue;
1437
1438                 drm_clflush_pages(obj_priv->page_list + i, 1);
1439
1440                 obj_priv->page_cpu_valid[i] = 1;
1441         }
1442
1443         return 0;
1444 }
1445
1446 /**
1447  * Once all of the objects have been set in the proper domain,
1448  * perform the necessary flush and invalidate operations.
1449  *
1450  * Returns the write domains flushed, for use in flush tracking.
1451  */
1452 static uint32_t
1453 i915_gem_dev_set_domain(struct drm_device *dev)
1454 {
1455         uint32_t flush_domains = dev->flush_domains;
1456
1457         /*
1458          * Now that all the buffers are synced to the proper domains,
1459          * flush and invalidate the collected domains
1460          */
1461         if (dev->invalidate_domains | dev->flush_domains) {
1462 #if WATCH_EXEC
1463                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
1464                           __func__,
1465                          dev->invalidate_domains,
1466                          dev->flush_domains);
1467 #endif
1468                 i915_gem_flush(dev,
1469                                dev->invalidate_domains,
1470                                dev->flush_domains);
1471                 dev->invalidate_domains = 0;
1472                 dev->flush_domains = 0;
1473         }
1474
1475         return flush_domains;
1476 }
1477
1478 /**
1479  * Pin an object to the GTT and evaluate the relocations landing in it.
1480  */
1481 static int
1482 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
1483                                  struct drm_file *file_priv,
1484                                  struct drm_i915_gem_exec_object *entry)
1485 {
1486         struct drm_device *dev = obj->dev;
1487         struct drm_i915_gem_relocation_entry reloc;
1488         struct drm_i915_gem_relocation_entry __user *relocs;
1489         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1490         int i, ret;
1491         uint32_t last_reloc_offset = -1;
1492         void __iomem *reloc_page = NULL;
1493
1494         /* Choose the GTT offset for our buffer and put it there. */
1495         ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
1496         if (ret)
1497                 return ret;
1498
1499         entry->offset = obj_priv->gtt_offset;
1500
1501         relocs = (struct drm_i915_gem_relocation_entry __user *)
1502                  (uintptr_t) entry->relocs_ptr;
1503         /* Apply the relocations, using the GTT aperture to avoid cache
1504          * flushing requirements.
1505          */
1506         for (i = 0; i < entry->relocation_count; i++) {
1507                 struct drm_gem_object *target_obj;
1508                 struct drm_i915_gem_object *target_obj_priv;
1509                 uint32_t reloc_val, reloc_offset;
1510                 uint32_t __iomem *reloc_entry;
1511
1512                 ret = copy_from_user(&reloc, relocs + i, sizeof(reloc));
1513                 if (ret != 0) {
1514                         i915_gem_object_unpin(obj);
1515                         return ret;
1516                 }
1517
1518                 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
1519                                                    reloc.target_handle);
1520                 if (target_obj == NULL) {
1521                         i915_gem_object_unpin(obj);
1522                         return -EBADF;
1523                 }
1524                 target_obj_priv = target_obj->driver_private;
1525
1526                 /* The target buffer should have appeared before us in the
1527                  * exec_object list, so it should have a GTT space bound by now.
1528                  */
1529                 if (target_obj_priv->gtt_space == NULL) {
1530                         DRM_ERROR("No GTT space found for object %d\n",
1531                                   reloc.target_handle);
1532                         drm_gem_object_unreference(target_obj);
1533                         i915_gem_object_unpin(obj);
1534                         return -EINVAL;
1535                 }
1536
1537                 if (reloc.offset > obj->size - 4) {
1538                         DRM_ERROR("Relocation beyond object bounds: "
1539                                   "obj %p target %d offset %d size %d.\n",
1540                                   obj, reloc.target_handle,
1541                                   (int) reloc.offset, (int) obj->size);
1542                         drm_gem_object_unreference(target_obj);
1543                         i915_gem_object_unpin(obj);
1544                         return -EINVAL;
1545                 }
1546                 if (reloc.offset & 3) {
1547                         DRM_ERROR("Relocation not 4-byte aligned: "
1548                                   "obj %p target %d offset %d.\n",
1549                                   obj, reloc.target_handle,
1550                                   (int) reloc.offset);
1551                         drm_gem_object_unreference(target_obj);
1552                         i915_gem_object_unpin(obj);
1553                         return -EINVAL;
1554                 }
1555
1556                 if (reloc.write_domain && target_obj->pending_write_domain &&
1557                     reloc.write_domain != target_obj->pending_write_domain) {
1558                         DRM_ERROR("Write domain conflict: "
1559                                   "obj %p target %d offset %d "
1560                                   "new %08x old %08x\n",
1561                                   obj, reloc.target_handle,
1562                                   (int) reloc.offset,
1563                                   reloc.write_domain,
1564                                   target_obj->pending_write_domain);
1565                         drm_gem_object_unreference(target_obj);
1566                         i915_gem_object_unpin(obj);
1567                         return -EINVAL;
1568                 }
1569
1570 #if WATCH_RELOC
1571                 DRM_INFO("%s: obj %p offset %08x target %d "
1572                          "read %08x write %08x gtt %08x "
1573                          "presumed %08x delta %08x\n",
1574                          __func__,
1575                          obj,
1576                          (int) reloc.offset,
1577                          (int) reloc.target_handle,
1578                          (int) reloc.read_domains,
1579                          (int) reloc.write_domain,
1580                          (int) target_obj_priv->gtt_offset,
1581                          (int) reloc.presumed_offset,
1582                          reloc.delta);
1583 #endif
1584
1585                 target_obj->pending_read_domains |= reloc.read_domains;
1586                 target_obj->pending_write_domain |= reloc.write_domain;
1587
1588                 /* If the relocation already has the right value in it, no
1589                  * more work needs to be done.
1590                  */
1591                 if (target_obj_priv->gtt_offset == reloc.presumed_offset) {
1592                         drm_gem_object_unreference(target_obj);
1593                         continue;
1594                 }
1595
1596                 /* Now that we're going to actually write some data in,
1597                  * make sure that any rendering using this buffer's contents
1598                  * is completed.
1599                  */
1600                 i915_gem_object_wait_rendering(obj);
1601
1602                 /* As we're writing through the gtt, flush
1603                  * any CPU writes before we write the relocations
1604                  */
1605                 if (obj->write_domain & I915_GEM_DOMAIN_CPU) {
1606                         i915_gem_clflush_object(obj);
1607                         drm_agp_chipset_flush(dev);
1608                         obj->write_domain = 0;
1609                 }
1610
1611                 /* Map the page containing the relocation we're going to
1612                  * perform.
1613                  */
1614                 reloc_offset = obj_priv->gtt_offset + reloc.offset;
1615                 if (reloc_page == NULL ||
1616                     (last_reloc_offset & ~(PAGE_SIZE - 1)) !=
1617                     (reloc_offset & ~(PAGE_SIZE - 1))) {
1618                         if (reloc_page != NULL)
1619                                 iounmap(reloc_page);
1620
1621                         reloc_page = ioremap_wc(dev->agp->base +
1622                                                 (reloc_offset &
1623                                                  ~(PAGE_SIZE - 1)),
1624                                                 PAGE_SIZE);
1625                         last_reloc_offset = reloc_offset;
1626                         if (reloc_page == NULL) {
1627                                 drm_gem_object_unreference(target_obj);
1628                                 i915_gem_object_unpin(obj);
1629                                 return -ENOMEM;
1630                         }
1631                 }
1632
1633                 reloc_entry = (uint32_t __iomem *)(reloc_page +
1634                                            (reloc_offset & (PAGE_SIZE - 1)));
1635                 reloc_val = target_obj_priv->gtt_offset + reloc.delta;
1636
1637 #if WATCH_BUF
1638                 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
1639                           obj, (unsigned int) reloc.offset,
1640                           readl(reloc_entry), reloc_val);
1641 #endif
1642                 writel(reloc_val, reloc_entry);
1643
1644                 /* Write the updated presumed offset for this entry back out
1645                  * to the user.
1646                  */
1647                 reloc.presumed_offset = target_obj_priv->gtt_offset;
1648                 ret = copy_to_user(relocs + i, &reloc, sizeof(reloc));
1649                 if (ret != 0) {
1650                         drm_gem_object_unreference(target_obj);
1651                         i915_gem_object_unpin(obj);
1652                         return ret;
1653                 }
1654
1655                 drm_gem_object_unreference(target_obj);
1656         }
1657
1658         if (reloc_page != NULL)
1659                 iounmap(reloc_page);
1660
1661 #if WATCH_BUF
1662         if (0)
1663                 i915_gem_dump_object(obj, 128, __func__, ~0);
1664 #endif
1665         return 0;
1666 }
1667
1668 /** Dispatch a batchbuffer to the ring
1669  */
1670 static int
1671 i915_dispatch_gem_execbuffer(struct drm_device *dev,
1672                               struct drm_i915_gem_execbuffer *exec,
1673                               uint64_t exec_offset)
1674 {
1675         drm_i915_private_t *dev_priv = dev->dev_private;
1676         struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *)
1677                                              (uintptr_t) exec->cliprects_ptr;
1678         int nbox = exec->num_cliprects;
1679         int i = 0, count;
1680         uint32_t        exec_start, exec_len;
1681         RING_LOCALS;
1682
1683         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
1684         exec_len = (uint32_t) exec->batch_len;
1685
1686         if ((exec_start | exec_len) & 0x7) {
1687                 DRM_ERROR("alignment\n");
1688                 return -EINVAL;
1689         }
1690
1691         if (!exec_start)
1692                 return -EINVAL;
1693
1694         count = nbox ? nbox : 1;
1695
1696         for (i = 0; i < count; i++) {
1697                 if (i < nbox) {
1698                         int ret = i915_emit_box(dev, boxes, i,
1699                                                 exec->DR1, exec->DR4);
1700                         if (ret)
1701                                 return ret;
1702                 }
1703
1704                 if (IS_I830(dev) || IS_845G(dev)) {
1705                         BEGIN_LP_RING(4);
1706                         OUT_RING(MI_BATCH_BUFFER);
1707                         OUT_RING(exec_start | MI_BATCH_NON_SECURE);
1708                         OUT_RING(exec_start + exec_len - 4);
1709                         OUT_RING(0);
1710                         ADVANCE_LP_RING();
1711                 } else {
1712                         BEGIN_LP_RING(2);
1713                         if (IS_I965G(dev)) {
1714                                 OUT_RING(MI_BATCH_BUFFER_START |
1715                                          (2 << 6) |
1716                                          MI_BATCH_NON_SECURE_I965);
1717                                 OUT_RING(exec_start);
1718                         } else {
1719                                 OUT_RING(MI_BATCH_BUFFER_START |
1720                                          (2 << 6));
1721                                 OUT_RING(exec_start | MI_BATCH_NON_SECURE);
1722                         }
1723                         ADVANCE_LP_RING();
1724                 }
1725         }
1726
1727         /* XXX breadcrumb */
1728         return 0;
1729 }
1730
1731 /* Throttle our rendering by waiting until the ring has completed our requests
1732  * emitted over 20 msec ago.
1733  *
1734  * This should get us reasonable parallelism between CPU and GPU but also
1735  * relatively low latency when blocking on a particular request to finish.
1736  */
1737 static int
1738 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
1739 {
1740         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
1741         int ret = 0;
1742         uint32_t seqno;
1743
1744         mutex_lock(&dev->struct_mutex);
1745         seqno = i915_file_priv->mm.last_gem_throttle_seqno;
1746         i915_file_priv->mm.last_gem_throttle_seqno =
1747                 i915_file_priv->mm.last_gem_seqno;
1748         if (seqno)
1749                 ret = i915_wait_request(dev, seqno);
1750         mutex_unlock(&dev->struct_mutex);
1751         return ret;
1752 }
1753
1754 int
1755 i915_gem_execbuffer(struct drm_device *dev, void *data,
1756                     struct drm_file *file_priv)
1757 {
1758         drm_i915_private_t *dev_priv = dev->dev_private;
1759         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
1760         struct drm_i915_gem_execbuffer *args = data;
1761         struct drm_i915_gem_exec_object *exec_list = NULL;
1762         struct drm_gem_object **object_list = NULL;
1763         struct drm_gem_object *batch_obj;
1764         int ret, i, pinned = 0;
1765         uint64_t exec_offset;
1766         uint32_t seqno, flush_domains;
1767
1768 #if WATCH_EXEC
1769         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
1770                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
1771 #endif
1772
1773         if (args->buffer_count < 1) {
1774                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1775                 return -EINVAL;
1776         }
1777         /* Copy in the exec list from userland */
1778         exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count,
1779                                DRM_MEM_DRIVER);
1780         object_list = drm_calloc(sizeof(*object_list), args->buffer_count,
1781                                  DRM_MEM_DRIVER);
1782         if (exec_list == NULL || object_list == NULL) {
1783                 DRM_ERROR("Failed to allocate exec or object list "
1784                           "for %d buffers\n",
1785                           args->buffer_count);
1786                 ret = -ENOMEM;
1787                 goto pre_mutex_err;
1788         }
1789         ret = copy_from_user(exec_list,
1790                              (struct drm_i915_relocation_entry __user *)
1791                              (uintptr_t) args->buffers_ptr,
1792                              sizeof(*exec_list) * args->buffer_count);
1793         if (ret != 0) {
1794                 DRM_ERROR("copy %d exec entries failed %d\n",
1795                           args->buffer_count, ret);
1796                 goto pre_mutex_err;
1797         }
1798
1799         mutex_lock(&dev->struct_mutex);
1800
1801         i915_verify_inactive(dev, __FILE__, __LINE__);
1802
1803         if (dev_priv->mm.wedged) {
1804                 DRM_ERROR("Execbuf while wedged\n");
1805                 mutex_unlock(&dev->struct_mutex);
1806                 return -EIO;
1807         }
1808
1809         if (dev_priv->mm.suspended) {
1810                 DRM_ERROR("Execbuf while VT-switched.\n");
1811                 mutex_unlock(&dev->struct_mutex);
1812                 return -EBUSY;
1813         }
1814
1815         /* Zero the gloabl flush/invalidate flags. These
1816          * will be modified as each object is bound to the
1817          * gtt
1818          */
1819         dev->invalidate_domains = 0;
1820         dev->flush_domains = 0;
1821
1822         /* Look up object handles and perform the relocations */
1823         for (i = 0; i < args->buffer_count; i++) {
1824                 object_list[i] = drm_gem_object_lookup(dev, file_priv,
1825                                                        exec_list[i].handle);
1826                 if (object_list[i] == NULL) {
1827                         DRM_ERROR("Invalid object handle %d at index %d\n",
1828                                    exec_list[i].handle, i);
1829                         ret = -EBADF;
1830                         goto err;
1831                 }
1832
1833                 object_list[i]->pending_read_domains = 0;
1834                 object_list[i]->pending_write_domain = 0;
1835                 ret = i915_gem_object_pin_and_relocate(object_list[i],
1836                                                        file_priv,
1837                                                        &exec_list[i]);
1838                 if (ret) {
1839                         DRM_ERROR("object bind and relocate failed %d\n", ret);
1840                         goto err;
1841                 }
1842                 pinned = i + 1;
1843         }
1844
1845         /* Set the pending read domains for the batch buffer to COMMAND */
1846         batch_obj = object_list[args->buffer_count-1];
1847         batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND;
1848         batch_obj->pending_write_domain = 0;
1849
1850         i915_verify_inactive(dev, __FILE__, __LINE__);
1851
1852         for (i = 0; i < args->buffer_count; i++) {
1853                 struct drm_gem_object *obj = object_list[i];
1854                 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1855
1856                 if (obj_priv->gtt_space == NULL) {
1857                         /* We evicted the buffer in the process of validating
1858                          * our set of buffers in.  We could try to recover by
1859                          * kicking them everything out and trying again from
1860                          * the start.
1861                          */
1862                         ret = -ENOMEM;
1863                         goto err;
1864                 }
1865
1866                 /* make sure all previous memory operations have passed */
1867                 ret = i915_gem_object_set_domain(obj,
1868                                                  obj->pending_read_domains,
1869                                                  obj->pending_write_domain);
1870                 if (ret)
1871                         goto err;
1872         }
1873
1874         i915_verify_inactive(dev, __FILE__, __LINE__);
1875
1876         /* Flush/invalidate caches and chipset buffer */
1877         flush_domains = i915_gem_dev_set_domain(dev);
1878
1879         i915_verify_inactive(dev, __FILE__, __LINE__);
1880
1881 #if WATCH_COHERENCY
1882         for (i = 0; i < args->buffer_count; i++) {
1883                 i915_gem_object_check_coherency(object_list[i],
1884                                                 exec_list[i].handle);
1885         }
1886 #endif
1887
1888         exec_offset = exec_list[args->buffer_count - 1].offset;
1889
1890 #if WATCH_EXEC
1891         i915_gem_dump_object(object_list[args->buffer_count - 1],
1892                               args->batch_len,
1893                               __func__,
1894                               ~0);
1895 #endif
1896
1897         (void)i915_add_request(dev, flush_domains);
1898
1899         /* Exec the batchbuffer */
1900         ret = i915_dispatch_gem_execbuffer(dev, args, exec_offset);
1901         if (ret) {
1902                 DRM_ERROR("dispatch failed %d\n", ret);
1903                 goto err;
1904         }
1905
1906         /*
1907          * Ensure that the commands in the batch buffer are
1908          * finished before the interrupt fires
1909          */
1910         flush_domains = i915_retire_commands(dev);
1911
1912         i915_verify_inactive(dev, __FILE__, __LINE__);
1913
1914         /*
1915          * Get a seqno representing the execution of the current buffer,
1916          * which we can wait on.  We would like to mitigate these interrupts,
1917          * likely by only creating seqnos occasionally (so that we have
1918          * *some* interrupts representing completion of buffers that we can
1919          * wait on when trying to clear up gtt space).
1920          */
1921         seqno = i915_add_request(dev, flush_domains);
1922         BUG_ON(seqno == 0);
1923         i915_file_priv->mm.last_gem_seqno = seqno;
1924         for (i = 0; i < args->buffer_count; i++) {
1925                 struct drm_gem_object *obj = object_list[i];
1926                 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1927
1928                 i915_gem_object_move_to_active(obj);
1929                 obj_priv->last_rendering_seqno = seqno;
1930 #if WATCH_LRU
1931                 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
1932 #endif
1933         }
1934 #if WATCH_LRU
1935         i915_dump_lru(dev, __func__);
1936 #endif
1937
1938         i915_verify_inactive(dev, __FILE__, __LINE__);
1939
1940         /* Copy the new buffer offsets back to the user's exec list. */
1941         ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1942                            (uintptr_t) args->buffers_ptr,
1943                            exec_list,
1944                            sizeof(*exec_list) * args->buffer_count);
1945         if (ret)
1946                 DRM_ERROR("failed to copy %d exec entries "
1947                           "back to user (%d)\n",
1948                            args->buffer_count, ret);
1949 err:
1950         if (object_list != NULL) {
1951                 for (i = 0; i < pinned; i++)
1952                         i915_gem_object_unpin(object_list[i]);
1953
1954                 for (i = 0; i < args->buffer_count; i++)
1955                         drm_gem_object_unreference(object_list[i]);
1956         }
1957         mutex_unlock(&dev->struct_mutex);
1958
1959 pre_mutex_err:
1960         drm_free(object_list, sizeof(*object_list) * args->buffer_count,
1961                  DRM_MEM_DRIVER);
1962         drm_free(exec_list, sizeof(*exec_list) * args->buffer_count,
1963                  DRM_MEM_DRIVER);
1964
1965         return ret;
1966 }
1967
1968 int
1969 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
1970 {
1971         struct drm_device *dev = obj->dev;
1972         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1973         int ret;
1974
1975         i915_verify_inactive(dev, __FILE__, __LINE__);
1976         if (obj_priv->gtt_space == NULL) {
1977                 ret = i915_gem_object_bind_to_gtt(obj, alignment);
1978                 if (ret != 0) {
1979                         DRM_ERROR("Failure to bind: %d", ret);
1980                         return ret;
1981                 }
1982         }
1983         obj_priv->pin_count++;
1984
1985         /* If the object is not active and not pending a flush,
1986          * remove it from the inactive list
1987          */
1988         if (obj_priv->pin_count == 1) {
1989                 atomic_inc(&dev->pin_count);
1990                 atomic_add(obj->size, &dev->pin_memory);
1991                 if (!obj_priv->active &&
1992                     (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
1993                                            I915_GEM_DOMAIN_GTT)) == 0 &&
1994                     !list_empty(&obj_priv->list))
1995                         list_del_init(&obj_priv->list);
1996         }
1997         i915_verify_inactive(dev, __FILE__, __LINE__);
1998
1999         return 0;
2000 }
2001
2002 void
2003 i915_gem_object_unpin(struct drm_gem_object *obj)
2004 {
2005         struct drm_device *dev = obj->dev;
2006         drm_i915_private_t *dev_priv = dev->dev_private;
2007         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2008
2009         i915_verify_inactive(dev, __FILE__, __LINE__);
2010         obj_priv->pin_count--;
2011         BUG_ON(obj_priv->pin_count < 0);
2012         BUG_ON(obj_priv->gtt_space == NULL);
2013
2014         /* If the object is no longer pinned, and is
2015          * neither active nor being flushed, then stick it on
2016          * the inactive list
2017          */
2018         if (obj_priv->pin_count == 0) {
2019                 if (!obj_priv->active &&
2020                     (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
2021                                            I915_GEM_DOMAIN_GTT)) == 0)
2022                         list_move_tail(&obj_priv->list,
2023                                        &dev_priv->mm.inactive_list);
2024                 atomic_dec(&dev->pin_count);
2025                 atomic_sub(obj->size, &dev->pin_memory);
2026         }
2027         i915_verify_inactive(dev, __FILE__, __LINE__);
2028 }
2029
2030 int
2031 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
2032                    struct drm_file *file_priv)
2033 {
2034         struct drm_i915_gem_pin *args = data;
2035         struct drm_gem_object *obj;
2036         struct drm_i915_gem_object *obj_priv;
2037         int ret;
2038
2039         mutex_lock(&dev->struct_mutex);
2040
2041         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
2042         if (obj == NULL) {
2043                 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
2044                           args->handle);
2045                 mutex_unlock(&dev->struct_mutex);
2046                 return -EBADF;
2047         }
2048         obj_priv = obj->driver_private;
2049
2050         ret = i915_gem_object_pin(obj, args->alignment);
2051         if (ret != 0) {
2052                 drm_gem_object_unreference(obj);
2053                 mutex_unlock(&dev->struct_mutex);
2054                 return ret;
2055         }
2056
2057         /* XXX - flush the CPU caches for pinned objects
2058          * as the X server doesn't manage domains yet
2059          */
2060         if (obj->write_domain & I915_GEM_DOMAIN_CPU) {
2061                 i915_gem_clflush_object(obj);
2062                 drm_agp_chipset_flush(dev);
2063                 obj->write_domain = 0;
2064         }
2065         args->offset = obj_priv->gtt_offset;
2066         drm_gem_object_unreference(obj);
2067         mutex_unlock(&dev->struct_mutex);
2068
2069         return 0;
2070 }
2071
2072 int
2073 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
2074                      struct drm_file *file_priv)
2075 {
2076         struct drm_i915_gem_pin *args = data;
2077         struct drm_gem_object *obj;
2078
2079         mutex_lock(&dev->struct_mutex);
2080
2081         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
2082         if (obj == NULL) {
2083                 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
2084                           args->handle);
2085                 mutex_unlock(&dev->struct_mutex);
2086                 return -EBADF;
2087         }
2088
2089         i915_gem_object_unpin(obj);
2090
2091         drm_gem_object_unreference(obj);
2092         mutex_unlock(&dev->struct_mutex);
2093         return 0;
2094 }
2095
2096 int
2097 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
2098                     struct drm_file *file_priv)
2099 {
2100         struct drm_i915_gem_busy *args = data;
2101         struct drm_gem_object *obj;
2102         struct drm_i915_gem_object *obj_priv;
2103
2104         mutex_lock(&dev->struct_mutex);
2105         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
2106         if (obj == NULL) {
2107                 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
2108                           args->handle);
2109                 mutex_unlock(&dev->struct_mutex);
2110                 return -EBADF;
2111         }
2112
2113         obj_priv = obj->driver_private;
2114         args->busy = obj_priv->active;
2115
2116         drm_gem_object_unreference(obj);
2117         mutex_unlock(&dev->struct_mutex);
2118         return 0;
2119 }
2120
2121 int
2122 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
2123                         struct drm_file *file_priv)
2124 {
2125     return i915_gem_ring_throttle(dev, file_priv);
2126 }
2127
2128 int i915_gem_init_object(struct drm_gem_object *obj)
2129 {
2130         struct drm_i915_gem_object *obj_priv;
2131
2132         obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER);
2133         if (obj_priv == NULL)
2134                 return -ENOMEM;
2135
2136         /*
2137          * We've just allocated pages from the kernel,
2138          * so they've just been written by the CPU with
2139          * zeros. They'll need to be clflushed before we
2140          * use them with the GPU.
2141          */
2142         obj->write_domain = I915_GEM_DOMAIN_CPU;
2143         obj->read_domains = I915_GEM_DOMAIN_CPU;
2144
2145         obj->driver_private = obj_priv;
2146         obj_priv->obj = obj;
2147         INIT_LIST_HEAD(&obj_priv->list);
2148         return 0;
2149 }
2150
2151 void i915_gem_free_object(struct drm_gem_object *obj)
2152 {
2153         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2154
2155         while (obj_priv->pin_count > 0)
2156                 i915_gem_object_unpin(obj);
2157
2158         i915_gem_object_unbind(obj);
2159
2160         drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
2161         drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
2162 }
2163
2164 static int
2165 i915_gem_set_domain(struct drm_gem_object *obj,
2166                     struct drm_file *file_priv,
2167                     uint32_t read_domains,
2168                     uint32_t write_domain)
2169 {
2170         struct drm_device *dev = obj->dev;
2171         int ret;
2172         uint32_t flush_domains;
2173
2174         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
2175
2176         ret = i915_gem_object_set_domain(obj, read_domains, write_domain);
2177         if (ret)
2178                 return ret;
2179         flush_domains = i915_gem_dev_set_domain(obj->dev);
2180
2181         if (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))
2182                 (void) i915_add_request(dev, flush_domains);
2183
2184         return 0;
2185 }
2186
2187 /** Unbinds all objects that are on the given buffer list. */
2188 static int
2189 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
2190 {
2191         struct drm_gem_object *obj;
2192         struct drm_i915_gem_object *obj_priv;
2193         int ret;
2194
2195         while (!list_empty(head)) {
2196                 obj_priv = list_first_entry(head,
2197                                             struct drm_i915_gem_object,
2198                                             list);
2199                 obj = obj_priv->obj;
2200
2201                 if (obj_priv->pin_count != 0) {
2202                         DRM_ERROR("Pinned object in unbind list\n");
2203                         mutex_unlock(&dev->struct_mutex);
2204                         return -EINVAL;
2205                 }
2206
2207                 ret = i915_gem_object_unbind(obj);
2208                 if (ret != 0) {
2209                         DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
2210                                   ret);
2211                         mutex_unlock(&dev->struct_mutex);
2212                         return ret;
2213                 }
2214         }
2215
2216
2217         return 0;
2218 }
2219
2220 static int
2221 i915_gem_idle(struct drm_device *dev)
2222 {
2223         drm_i915_private_t *dev_priv = dev->dev_private;
2224         uint32_t seqno, cur_seqno, last_seqno;
2225         int stuck, ret;
2226
2227         if (dev_priv->mm.suspended)
2228                 return 0;
2229
2230         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
2231          * We need to replace this with a semaphore, or something.
2232          */
2233         dev_priv->mm.suspended = 1;
2234
2235         i915_kernel_lost_context(dev);
2236
2237         /* Flush the GPU along with all non-CPU write domains
2238          */
2239         i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
2240                        ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
2241         seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU |
2242                                         I915_GEM_DOMAIN_GTT));
2243
2244         if (seqno == 0) {
2245                 mutex_unlock(&dev->struct_mutex);
2246                 return -ENOMEM;
2247         }
2248
2249         dev_priv->mm.waiting_gem_seqno = seqno;
2250         last_seqno = 0;
2251         stuck = 0;
2252         for (;;) {
2253                 cur_seqno = i915_get_gem_seqno(dev);
2254                 if (i915_seqno_passed(cur_seqno, seqno))
2255                         break;
2256                 if (last_seqno == cur_seqno) {
2257                         if (stuck++ > 100) {
2258                                 DRM_ERROR("hardware wedged\n");
2259                                 dev_priv->mm.wedged = 1;
2260                                 DRM_WAKEUP(&dev_priv->irq_queue);
2261                                 break;
2262                         }
2263                 }
2264                 msleep(10);
2265                 last_seqno = cur_seqno;
2266         }
2267         dev_priv->mm.waiting_gem_seqno = 0;
2268
2269         i915_gem_retire_requests(dev);
2270
2271         /* Active and flushing should now be empty as we've
2272          * waited for a sequence higher than any pending execbuffer
2273          */
2274         BUG_ON(!list_empty(&dev_priv->mm.active_list));
2275         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2276
2277         /* Request should now be empty as we've also waited
2278          * for the last request in the list
2279          */
2280         BUG_ON(!list_empty(&dev_priv->mm.request_list));
2281
2282         /* Move all buffers out of the GTT. */
2283         ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
2284         if (ret)
2285                 return ret;
2286
2287         BUG_ON(!list_empty(&dev_priv->mm.active_list));
2288         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2289         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
2290         BUG_ON(!list_empty(&dev_priv->mm.request_list));
2291         return 0;
2292 }
2293
2294 static int
2295 i915_gem_init_hws(struct drm_device *dev)
2296 {
2297         drm_i915_private_t *dev_priv = dev->dev_private;
2298         struct drm_gem_object *obj;
2299         struct drm_i915_gem_object *obj_priv;
2300         int ret;
2301
2302         /* If we need a physical address for the status page, it's already
2303          * initialized at driver load time.
2304          */
2305         if (!I915_NEED_GFX_HWS(dev))
2306                 return 0;
2307
2308         obj = drm_gem_object_alloc(dev, 4096);
2309         if (obj == NULL) {
2310                 DRM_ERROR("Failed to allocate status page\n");
2311                 return -ENOMEM;
2312         }
2313         obj_priv = obj->driver_private;
2314
2315         ret = i915_gem_object_pin(obj, 4096);
2316         if (ret != 0) {
2317                 drm_gem_object_unreference(obj);
2318                 return ret;
2319         }
2320
2321         dev_priv->status_gfx_addr = obj_priv->gtt_offset;
2322         dev_priv->hws_map.offset = dev->agp->base + obj_priv->gtt_offset;
2323         dev_priv->hws_map.size = 4096;
2324         dev_priv->hws_map.type = 0;
2325         dev_priv->hws_map.flags = 0;
2326         dev_priv->hws_map.mtrr = 0;
2327
2328         /* Ioremapping here is the wrong thing to do.  We want cached access.
2329          */
2330         drm_core_ioremap_wc(&dev_priv->hws_map, dev);
2331         if (dev_priv->hws_map.handle == NULL) {
2332                 DRM_ERROR("Failed to map status page.\n");
2333                 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
2334                 drm_gem_object_unreference(obj);
2335                 return -EINVAL;
2336         }
2337         dev_priv->hws_obj = obj;
2338         dev_priv->hw_status_page = dev_priv->hws_map.handle;
2339         memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
2340         I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
2341         DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
2342
2343         return 0;
2344 }
2345
2346 static int
2347 i915_gem_init_ringbuffer(struct drm_device *dev)
2348 {
2349         drm_i915_private_t *dev_priv = dev->dev_private;
2350         struct drm_gem_object *obj;
2351         struct drm_i915_gem_object *obj_priv;
2352         int ret;
2353         u32 head;
2354
2355         ret = i915_gem_init_hws(dev);
2356         if (ret != 0)
2357                 return ret;
2358
2359         obj = drm_gem_object_alloc(dev, 128 * 1024);
2360         if (obj == NULL) {
2361                 DRM_ERROR("Failed to allocate ringbuffer\n");
2362                 return -ENOMEM;
2363         }
2364         obj_priv = obj->driver_private;
2365
2366         ret = i915_gem_object_pin(obj, 4096);
2367         if (ret != 0) {
2368                 drm_gem_object_unreference(obj);
2369                 return ret;
2370         }
2371
2372         /* Set up the kernel mapping for the ring. */
2373         dev_priv->ring.Size = obj->size;
2374         dev_priv->ring.tail_mask = obj->size - 1;
2375
2376         dev_priv->ring.map.offset = dev->agp->base + obj_priv->gtt_offset;
2377         dev_priv->ring.map.size = obj->size;
2378         dev_priv->ring.map.type = 0;
2379         dev_priv->ring.map.flags = 0;
2380         dev_priv->ring.map.mtrr = 0;
2381
2382         drm_core_ioremap_wc(&dev_priv->ring.map, dev);
2383         if (dev_priv->ring.map.handle == NULL) {
2384                 DRM_ERROR("Failed to map ringbuffer.\n");
2385                 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
2386                 drm_gem_object_unreference(obj);
2387                 return -EINVAL;
2388         }
2389         dev_priv->ring.ring_obj = obj;
2390         dev_priv->ring.virtual_start = dev_priv->ring.map.handle;
2391
2392         /* Stop the ring if it's running. */
2393         I915_WRITE(PRB0_CTL, 0);
2394         I915_WRITE(PRB0_TAIL, 0);
2395         I915_WRITE(PRB0_HEAD, 0);
2396
2397         /* Initialize the ring. */
2398         I915_WRITE(PRB0_START, obj_priv->gtt_offset);
2399         head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
2400
2401         /* G45 ring initialization fails to reset head to zero */
2402         if (head != 0) {
2403                 DRM_ERROR("Ring head not reset to zero "
2404                           "ctl %08x head %08x tail %08x start %08x\n",
2405                           I915_READ(PRB0_CTL),
2406                           I915_READ(PRB0_HEAD),
2407                           I915_READ(PRB0_TAIL),
2408                           I915_READ(PRB0_START));
2409                 I915_WRITE(PRB0_HEAD, 0);
2410
2411                 DRM_ERROR("Ring head forced to zero "
2412                           "ctl %08x head %08x tail %08x start %08x\n",
2413                           I915_READ(PRB0_CTL),
2414                           I915_READ(PRB0_HEAD),
2415                           I915_READ(PRB0_TAIL),
2416                           I915_READ(PRB0_START));
2417         }
2418
2419         I915_WRITE(PRB0_CTL,
2420                    ((obj->size - 4096) & RING_NR_PAGES) |
2421                    RING_NO_REPORT |
2422                    RING_VALID);
2423
2424         head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
2425
2426         /* If the head is still not zero, the ring is dead */
2427         if (head != 0) {
2428                 DRM_ERROR("Ring initialization failed "
2429                           "ctl %08x head %08x tail %08x start %08x\n",
2430                           I915_READ(PRB0_CTL),
2431                           I915_READ(PRB0_HEAD),
2432                           I915_READ(PRB0_TAIL),
2433                           I915_READ(PRB0_START));
2434                 return -EIO;
2435         }
2436
2437         /* Update our cache of the ring state */
2438         i915_kernel_lost_context(dev);
2439
2440         return 0;
2441 }
2442
2443 static void
2444 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
2445 {
2446         drm_i915_private_t *dev_priv = dev->dev_private;
2447
2448         if (dev_priv->ring.ring_obj == NULL)
2449                 return;
2450
2451         drm_core_ioremapfree(&dev_priv->ring.map, dev);
2452
2453         i915_gem_object_unpin(dev_priv->ring.ring_obj);
2454         drm_gem_object_unreference(dev_priv->ring.ring_obj);
2455         dev_priv->ring.ring_obj = NULL;
2456         memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
2457
2458         if (dev_priv->hws_obj != NULL) {
2459                 i915_gem_object_unpin(dev_priv->hws_obj);
2460                 drm_gem_object_unreference(dev_priv->hws_obj);
2461                 dev_priv->hws_obj = NULL;
2462                 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
2463
2464                 /* Write high address into HWS_PGA when disabling. */
2465                 I915_WRITE(HWS_PGA, 0x1ffff000);
2466         }
2467 }
2468
2469 int
2470 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
2471                        struct drm_file *file_priv)
2472 {
2473         drm_i915_private_t *dev_priv = dev->dev_private;
2474         int ret;
2475
2476         if (dev_priv->mm.wedged) {
2477                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
2478                 dev_priv->mm.wedged = 0;
2479         }
2480
2481         ret = i915_gem_init_ringbuffer(dev);
2482         if (ret != 0)
2483                 return ret;
2484
2485         mutex_lock(&dev->struct_mutex);
2486         BUG_ON(!list_empty(&dev_priv->mm.active_list));
2487         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2488         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
2489         BUG_ON(!list_empty(&dev_priv->mm.request_list));
2490         dev_priv->mm.suspended = 0;
2491         mutex_unlock(&dev->struct_mutex);
2492
2493         drm_irq_install(dev);
2494
2495         return 0;
2496 }
2497
2498 int
2499 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
2500                        struct drm_file *file_priv)
2501 {
2502         int ret;
2503
2504         mutex_lock(&dev->struct_mutex);
2505         ret = i915_gem_idle(dev);
2506         if (ret == 0)
2507                 i915_gem_cleanup_ringbuffer(dev);
2508         mutex_unlock(&dev->struct_mutex);
2509
2510         drm_irq_uninstall(dev);
2511
2512         return 0;
2513 }
2514
2515 void
2516 i915_gem_lastclose(struct drm_device *dev)
2517 {
2518         int ret;
2519         drm_i915_private_t *dev_priv = dev->dev_private;
2520
2521         mutex_lock(&dev->struct_mutex);
2522
2523         if (dev_priv->ring.ring_obj != NULL) {
2524                 ret = i915_gem_idle(dev);
2525                 if (ret)
2526                         DRM_ERROR("failed to idle hardware: %d\n", ret);
2527
2528                 i915_gem_cleanup_ringbuffer(dev);
2529         }
2530
2531         mutex_unlock(&dev->struct_mutex);
2532 }
2533
2534 void
2535 i915_gem_load(struct drm_device *dev)
2536 {
2537         drm_i915_private_t *dev_priv = dev->dev_private;
2538
2539         INIT_LIST_HEAD(&dev_priv->mm.active_list);
2540         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
2541         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
2542         INIT_LIST_HEAD(&dev_priv->mm.request_list);
2543         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
2544                           i915_gem_retire_work_handler);
2545         INIT_WORK(&dev_priv->mm.vblank_work,
2546                   i915_gem_vblank_work_handler);
2547         dev_priv->mm.next_gem_seqno = 1;
2548
2549         i915_gem_detect_bit_6_swizzle(dev);
2550 }