Merge branch 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 27 May 2010 17:48:46 +0000 (10:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 27 May 2010 17:48:46 +0000 (10:48 -0700)
* 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6:
  [S390] fill out file list in s390 MAINTAINERS entry
  [S390] Add support for LZO-compressed kernels.
  [S390] cmm: get rid of CMM_PROC config option
  [S390] cmm: remove superfluous EXPORT_SYMBOLs plus cleanups
  [S390] dasd: unit check handling during internal cio I/O
  [S390] cio: unit check handling during internal I/O
  [S390] ccwgroup: add locking around drvdata access
  [S390] cio: remove stsch
  [S390] spp: remove KVM_AWARE_CMF config option
  [S390] kprobes: forbid probing of stnsm/stosm/epsw
  [S390] spp: fix compilation for CONFIG_32BIT
  [S390] atomic: implement atomic64_dec_if_positive
  [S390] cmm: fix crash on module unload

389 files changed:
Documentation/ABI/testing/sysfs-firmware-sfi [new file with mode: 0644]
Documentation/DMA-API-HOWTO.txt
Documentation/SubmittingDrivers
Documentation/cgroups/cgroups.txt
Documentation/cgroups/memory.txt
Documentation/feature-removal-schedule.txt
Documentation/filesystems/Locking
Documentation/vm/numa
MAINTAINERS
arch/alpha/Kconfig
arch/alpha/include/asm/scatterlist.h
arch/arm/configs/s3c2410_defconfig
arch/arm/configs/s3c6400_defconfig
arch/arm/configs/s5p6440_defconfig
arch/arm/configs/s5p6442_defconfig
arch/arm/configs/s5pc100_defconfig
arch/arm/configs/s5pc110_defconfig
arch/arm/configs/s5pv210_defconfig
arch/arm/include/asm/scatterlist.h
arch/arm/mach-davinci/include/mach/mmc.h
arch/arm/mach-omap2/board-3430sdp.c
arch/arm/mach-omap2/board-ldp.c
arch/arm/mach-omap2/board-omap3evm.c
arch/arm/mach-omap2/board-omap3pandora.c
arch/arm/mach-omap2/board-omap3touchbook.c
arch/arm/mach-s5p6440/include/mach/irqs.h
arch/arm/mach-s5p6442/include/mach/irqs.h
arch/arm/mach-s5pc100/include/mach/irqs.h
arch/arm/mach-s5pc100/include/mach/regs-gpio.h
arch/arm/mach-s5pv210/include/mach/irqs.h
arch/arm/mach-s5pv210/include/mach/regs-gpio.h
arch/arm/plat-omap/gpio.c
arch/arm/plat-s5p/Kconfig
arch/arm/plat-s5p/include/plat/irqs.h
arch/arm/plat-s5p/irq-eint.c
arch/avr32/include/asm/scatterlist.h
arch/blackfin/include/asm/scatterlist.h
arch/blackfin/kernel/ptrace.c
arch/cris/include/asm/scatterlist.h
arch/frv/include/asm/scatterlist.h
arch/frv/kernel/ptrace.c
arch/frv/kernel/sysctl.c
arch/h8300/include/asm/scatterlist.h
arch/ia64/Kconfig
arch/ia64/include/asm/scatterlist.h
arch/ia64/include/asm/topology.h
arch/ia64/kernel/pci-swiotlb.c
arch/ia64/kernel/ptrace.c
arch/ia64/kernel/smpboot.c
arch/m32r/include/asm/scatterlist.h
arch/m68k/Kconfig
arch/m68k/amiga/config.c
arch/m68k/amiga/platform.c
arch/m68k/include/asm/amigayle.h
arch/m68k/include/asm/atomic.h
arch/m68k/include/asm/cache.h
arch/m68k/include/asm/scatterlist.h
arch/microblaze/include/asm/scatterlist.h
arch/mips/include/asm/scatterlist.h
arch/mn10300/include/asm/scatterlist.h
arch/parisc/Kconfig
arch/parisc/include/asm/scatterlist.h
arch/powerpc/Kconfig
arch/powerpc/include/asm/scatterlist.h
arch/powerpc/kernel/dma-swiotlb.c
arch/powerpc/kernel/dma.c
arch/powerpc/sysdev/fsl_rio.c
arch/s390/include/asm/scatterlist.h
arch/s390/kernel/smp.c
arch/score/include/asm/scatterlist.h
arch/sh/Kconfig
arch/sh/kernel/ptrace_32.c
arch/sparc/Kconfig
arch/sparc/include/asm/scatterlist.h
arch/x86/Kconfig
arch/x86/include/asm/scatterlist.h
arch/x86/include/asm/topology.h
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpuid.c
arch/x86/kernel/msr.c
arch/x86/kernel/pci-swiotlb.c
arch/x86/kernel/setup_percpu.c
arch/x86/mm/numa.c
arch/x86/mm/numa_64.c
arch/xtensa/include/asm/scatterlist.h
drivers/base/topology.c
drivers/char/Kconfig
drivers/char/Makefile
drivers/char/agp/amd64-agp.c
drivers/char/amiserial.c
drivers/char/applicom.c
drivers/char/ipmi/ipmi_msghandler.c
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/ppdev.c
drivers/char/ramoops.c [new file with mode: 0644]
drivers/char/vt.c
drivers/edac/i5000_edac.c
drivers/edac/i5400_edac.c
drivers/edac/i82443bxgx_edac.c
drivers/firewire/core-card.c
drivers/firewire/core-cdev.c
drivers/firewire/core-transaction.c
drivers/firewire/core.h
drivers/firewire/ohci.c
drivers/firewire/ohci.h
drivers/gpio/Kconfig
drivers/gpio/cs5535-gpio.c
drivers/gpio/gpiolib.c
drivers/gpio/it8761e_gpio.c
drivers/gpio/langwell_gpio.c
drivers/gpio/max732x.c
drivers/gpio/pca953x.c
drivers/gpio/pl061.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/nouveau/nouveau_connector.c
drivers/gpu/drm/nouveau/nv40_graph.c
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_agp.c
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/hid/hid-debug.c
drivers/ide/gayle.c
drivers/ieee1394/dv1394.c
drivers/ieee1394/raw1394.c
drivers/ieee1394/video1394.c
drivers/infiniband/hw/ehca/ehca_irq.c
drivers/input/joydev.c
drivers/input/keyboard/amikbd.c
drivers/input/misc/Kconfig
drivers/input/misc/Makefile
drivers/input/misc/max8925_onkey.c [new file with mode: 0644]
drivers/input/misc/twl4030-vibra.c
drivers/input/misc/uinput.c
drivers/input/mouse/amimouse.c
drivers/input/touchscreen/ads7846.c
drivers/input/touchscreen/s3c2410_ts.c
drivers/input/touchscreen/usbtouchscreen.c
drivers/isdn/mISDN/timerdev.c
drivers/md/raid5.c
drivers/message/i2o/i2o_config.c
drivers/misc/lkdtm.c
drivers/mmc/core/core.c
drivers/mmc/core/sd_ops.c
drivers/mmc/core/sdio_io.c
drivers/mmc/host/Kconfig
drivers/mmc/host/Makefile
drivers/mmc/host/at91_mci.c
drivers/mmc/host/atmel-mci.c
drivers/mmc/host/au1xmmc.c
drivers/mmc/host/bfin_sdh.c
drivers/mmc/host/cb710-mmc.c
drivers/mmc/host/davinci_mmc.c
drivers/mmc/host/imxmmc.c
drivers/mmc/host/mmci.c
drivers/mmc/host/msm_sdcc.c
drivers/mmc/host/mvsdio.c
drivers/mmc/host/mxcmmc.c
drivers/mmc/host/omap.c
drivers/mmc/host/omap_hsmmc.c
drivers/mmc/host/pxamci.c
drivers/mmc/host/s3cmci.c
drivers/mmc/host/sdhci-of-core.c
drivers/mmc/host/sdhci-of-esdhc.c
drivers/mmc/host/sdhci-of-hlwd.c
drivers/mmc/host/sdhci-pci.c
drivers/mmc/host/sdhci-pltfm.c
drivers/mmc/host/sdhci-s3c.c
drivers/mmc/host/sdhci-spear.c [new file with mode: 0644]
drivers/mmc/host/sdhci.c
drivers/mmc/host/sdhci.h
drivers/mmc/host/sdricoh_cs.c
drivers/mmc/host/sh_mmcif.c [new file with mode: 0644]
drivers/mmc/host/tifm_sd.c
drivers/mmc/host/tmio_mmc.c
drivers/mmc/host/via-sdmmc.c
drivers/mmc/host/wbsd.c
drivers/parport/parport_amiga.c
drivers/rapidio/Kconfig
drivers/rapidio/Makefile
drivers/rapidio/rio-scan.c
drivers/rapidio/rio.c
drivers/rapidio/rio.h
drivers/rapidio/switches/Kconfig [new file with mode: 0644]
drivers/rapidio/switches/Makefile
drivers/rapidio/switches/idtcps.c [new file with mode: 0644]
drivers/rapidio/switches/tsi500.c
drivers/rapidio/switches/tsi568.c [new file with mode: 0644]
drivers/rapidio/switches/tsi57x.c [new file with mode: 0644]
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/rtc-ab8500.c [new file with mode: 0644]
drivers/rtc/rtc-m41t80.c
drivers/scsi/a2091.c
drivers/scsi/a2091.h
drivers/scsi/a3000.c
drivers/scsi/a3000.h
drivers/scsi/a4000t.c
drivers/scsi/aacraid/commctrl.c
drivers/scsi/arcmsr/arcmsr.h
drivers/scsi/arcmsr/arcmsr_attr.c
drivers/scsi/arcmsr/arcmsr_hba.c
drivers/scsi/be2iscsi/be_mgmt.c
drivers/scsi/bfa/bfa_core.c
drivers/scsi/gvp11.c
drivers/scsi/gvp11.h
drivers/scsi/ipr.c
drivers/scsi/ipr.h
drivers/scsi/iscsi_tcp.c
drivers/scsi/mvme147.c
drivers/scsi/osst.c
drivers/scsi/scsi_scan.c
drivers/scsi/st.c
drivers/sfi/sfi_acpi.c
drivers/sfi/sfi_core.c
drivers/sfi/sfi_core.h
drivers/staging/go7007/saa7134-go7007.c
drivers/telephony/ixj.c
drivers/video/bf54x-lq043fb.c
drivers/video/bfin-t350mcqb-fb.c
drivers/video/s3fb.c
drivers/video/via/viafbdev.c
fs/affs/namei.c
fs/aio.c
fs/autofs/root.c
fs/autofs4/dev-ioctl.c
fs/btrfs/async-thread.c
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/inode-item.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-defrag.c
fs/btrfs/tree-log.c
fs/btrfs/tree-log.h
fs/btrfs/volumes.c
fs/btrfs/xattr.c
fs/compat.c
fs/direct-io.c
fs/exec.c
fs/ext4/balloc.c
fs/ext4/block_validity.c
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.h
fs/ext4/extents.c
fs/ext4/file.c
fs/ext4/fsync.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/migrate.c
fs/ext4/move_extent.c
fs/ext4/namei.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/symlink.c
fs/ext4/xattr.c
fs/freevxfs/vxfs_lookup.c
fs/isofs/dir.c
fs/jbd2/transaction.c
fs/ncpfs/dir.c
fs/nfs/dir.c
fs/nfs/write.c
fs/proc/array.c
fs/proc/base.c
fs/proc/generic.c
fs/proc/kcore.c
fs/proc/root.c
fs/qnx4/dir.c
fs/quota/dquot.c
fs/read_write.c
fs/reiserfs/dir.c
fs/smbfs/dir.c
fs/udf/dir.c
fs/ufs/super.c
fs/ufs/ufs_fs.h
include/asm-generic/dma-mapping-common.h
include/asm-generic/gpio.h
include/asm-generic/scatterlist.h
include/asm-generic/topology.h
include/asm-generic/vmlinux.lds.h
include/linux/aio.h
include/linux/bitmap.h
include/linux/cgroup.h
include/linux/compat.h
include/linux/cpuset.h
include/linux/cred.h
include/linux/dma-mapping.h
include/linux/firewire.h
include/linux/fs.h
include/linux/gpio.h
include/linux/i2c/max732x.h
include/linux/i2c/pca953x.h
include/linux/init_task.h
include/linux/input.h
include/linux/joystick.h
include/linux/kmod.h
include/linux/memcontrol.h
include/linux/mmc/host.h
include/linux/mmc/sdhci-spear.h [new file with mode: 0644]
include/linux/mmc/sdio_func.h
include/linux/mmc/sh_mmcif.h [new file with mode: 0644]
include/linux/mmzone.h
include/linux/nodemask.h
include/linux/notifier.h
include/linux/page_cgroup.h
include/linux/quotaops.h
include/linux/random.h
include/linux/rio.h
include/linux/rio_drv.h
include/linux/rio_ids.h
include/linux/rio_regs.h
include/linux/sched.h
include/linux/sdhci-pltfm.h [new file with mode: 0644]
include/linux/sem.h
include/linux/sfi.h
include/linux/swap.h
include/linux/swiotlb.h
include/linux/threads.h
include/linux/topology.h
include/linux/uinput.h
include/trace/events/ext4.h
ipc/sem.c
kernel/cgroup.c
kernel/cpu.c
kernel/cpuset.c
kernel/cred.c
kernel/exit.c
kernel/fork.c
kernel/kmod.c
kernel/padata.c
kernel/panic.c
kernel/pid.c
kernel/posix-cpu-timers.c
kernel/profile.c
kernel/ptrace.c
kernel/relay.c
kernel/sched.c
kernel/sched_debug.c
kernel/signal.c
kernel/smp.c
kernel/softirq.c
kernel/sys.c
kernel/timer.c
kernel/workqueue.c
lib/Kconfig.debug
lib/Makefile
lib/bitmap.c
lib/cpu-notifier-error-inject.c [new file with mode: 0644]
lib/idr.c
lib/radix-tree.c
lib/random32.c
lib/swiotlb.c
mm/filemap.c
mm/memcontrol.c
mm/migrate.c
mm/oom_kill.c
mm/page_alloc.c
mm/shmem.c
mm/slab.c
net/iucv/iucv.c
net/sunrpc/xprtsock.c
scripts/gen_initramfs_list.sh
security/keys/internal.h
security/keys/keyctl.c
security/keys/process_keys.c
security/keys/request_key.c
usr/Makefile
usr/initramfs_data.lzo.S [new file with mode: 0644]

diff --git a/Documentation/ABI/testing/sysfs-firmware-sfi b/Documentation/ABI/testing/sysfs-firmware-sfi
new file mode 100644 (file)
index 0000000..4be7d44
--- /dev/null
@@ -0,0 +1,15 @@
+What:          /sys/firmware/sfi/tables/
+Date:          May 2010
+Contact:       Len Brown <lenb@kernel.org>
+Description:
+               SFI defines a number of small static memory tables
+               so the kernel can get platform information from firmware.
+
+               The tables are defined in the latest SFI specification:
+               http://simplefirmware.org/documentation
+
+               While the tables are used by the kernel, user-space
+               can observe them this way:
+
+               # cd /sys/firmware/sfi/tables
+               # cat $TABLENAME > $TABLENAME.bin
index 2e435ad..98ce517 100644 (file)
@@ -639,6 +639,36 @@ is planned to completely remove virt_to_bus() and bus_to_virt() as
 they are entirely deprecated.  Some ports already do not provide these
 as it is impossible to correctly support them.
 
+                       Handling Errors
+
+DMA address space is limited on some architectures and an allocation
+failure can be determined by:
+
+- checking if dma_alloc_coherent returns NULL or dma_map_sg returns 0
+
+- checking the returned dma_addr_t of dma_map_single and dma_map_page
+  by using dma_mapping_error():
+
+       dma_addr_t dma_handle;
+
+       dma_handle = dma_map_single(dev, addr, size, direction);
+       if (dma_mapping_error(dev, dma_handle)) {
+               /*
+                * reduce current DMA mapping usage,
+                * delay and try again later or
+                * reset driver.
+                */
+       }
+
+Networking drivers must call dev_kfree_skb to free the socket buffer
+and return NETDEV_TX_OK if the DMA mapping fails on the transmit hook
+(ndo_start_xmit). This means that the socket buffer is just dropped in
+the failure case.
+
+SCSI drivers must return SCSI_MLQUEUE_HOST_BUSY if the DMA mapping
+fails in the queuecommand hook. This means that the SCSI subsystem
+passes the command to the driver again later.
+
                Optimizing Unmap State Space Consumption
 
 On many platforms, dma_unmap_{single,page}() is simply a nop.
@@ -703,42 +733,25 @@ to "Closing".
 
 1) Struct scatterlist requirements.
 
-   Struct scatterlist must contain, at a minimum, the following
-   members:
-
-       struct page *page;
-       unsigned int offset;
-       unsigned int length;
-
-   The base address is specified by a "page+offset" pair.
-
-   Previous versions of struct scatterlist contained a "void *address"
-   field that was sometimes used instead of page+offset.  As of Linux
-   2.5., page+offset is always used, and the "address" field has been
-   deleted.
-
-2) More to come...
-
-                       Handling Errors
-
-DMA address space is limited on some architectures and an allocation
-failure can be determined by:
-
-- checking if dma_alloc_coherent returns NULL or dma_map_sg returns 0
-
-- checking the returned dma_addr_t of dma_map_single and dma_map_page
-  by using dma_mapping_error():
-
-       dma_addr_t dma_handle;
-
-       dma_handle = dma_map_single(dev, addr, size, direction);
-       if (dma_mapping_error(dev, dma_handle)) {
-               /*
-                * reduce current DMA mapping usage,
-                * delay and try again later or
-                * reset driver.
-                */
-       }
+   Don't invent the architecture specific struct scatterlist; just use
+   <asm-generic/scatterlist.h>. You need to enable
+   CONFIG_NEED_SG_DMA_LENGTH if the architecture supports IOMMUs
+   (including software IOMMU).
+
+2) ARCH_KMALLOC_MINALIGN
+
+   Architectures must ensure that kmalloc'ed buffer is
+   DMA-safe. Drivers and subsystems depend on it. If an architecture
+   isn't fully DMA-coherent (i.e. hardware doesn't ensure that data in
+   the CPU cache is identical to data in main memory),
+   ARCH_KMALLOC_MINALIGN must be set so that the memory allocator
+   makes sure that kmalloc'ed buffer doesn't share a cache line with
+   the others. See arch/arm/include/asm/cache.h as an example.
+
+   Note that ARCH_KMALLOC_MINALIGN is about DMA memory alignment
+   constraints. You don't need to worry about the architecture data
+   alignment constraints (e.g. the alignment constraints about 64-bit
+   objects).
 
                           Closing
 
index 99e72a8..4947fd8 100644 (file)
@@ -130,6 +130,8 @@ Linux kernel master tree:
        ftp.??.kernel.org:/pub/linux/kernel/...
        ?? == your country code, such as "us", "uk", "fr", etc.
 
+       http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git
+
 Linux kernel mailing list:
        linux-kernel@vger.kernel.org
        [mail majordomo@vger.kernel.org to subscribe]
@@ -160,3 +162,6 @@ How to NOT write kernel driver by Arjan van de Ven:
 
 Kernel Janitor:
        http://janitor.kernelnewbies.org/
+
+GIT, Fast Version Control System:
+       http://git-scm.com/
index 57444c2..b34823f 100644 (file)
@@ -339,7 +339,7 @@ To mount a cgroup hierarchy with all available subsystems, type:
 The "xxx" is not interpreted by the cgroup code, but will appear in
 /proc/mounts so may be any useful identifying string that you like.
 
-To mount a cgroup hierarchy with just the cpuset and numtasks
+To mount a cgroup hierarchy with just the cpuset and memory
 subsystems, type:
 # mount -t cgroup -o cpuset,memory hier1 /dev/cgroup
 
index 6cab1f2..7781857 100644 (file)
@@ -1,18 +1,15 @@
 Memory Resource Controller
 
 NOTE: The Memory Resource Controller has been generically been referred
-to as the memory controller in this document. Do not confuse memory controller
-used here with the memory controller that is used in hardware.
+      to as the memory controller in this document. Do not confuse memory
+      controller used here with the memory controller that is used in hardware.
 
-Salient features
-
-a. Enable control of Anonymous, Page Cache (mapped and unmapped) and
-   Swap Cache memory pages.
-b. The infrastructure allows easy addition of other types of memory to control
-c. Provides *zero overhead* for non memory controller users
-d. Provides a double LRU: global memory pressure causes reclaim from the
-   global LRU; a cgroup on hitting a limit, reclaims from the per
-   cgroup LRU
+(For editors)
+In this document:
+      When we mention a cgroup (cgroupfs's directory) with memory controller,
+      we call it "memory cgroup". When you see git-log and source code, you'll
+      see patch's title and function names tend to use "memcg".
+      In this document, we avoid using it.
 
 Benefits and Purpose of the memory controller
 
@@ -33,6 +30,45 @@ d. A CD/DVD burner could control the amount of memory used by the
 e. There are several other use cases, find one or use the controller just
    for fun (to learn and hack on the VM subsystem).
 
+Current Status: linux-2.6.34-mmotm(development version of 2010/April)
+
+Features:
+ - accounting anonymous pages, file caches, swap caches usage and limiting them.
+ - private LRU and reclaim routine. (system's global LRU and private LRU
+   work independently from each other)
+ - optionally, memory+swap usage can be accounted and limited.
+ - hierarchical accounting
+ - soft limit
+ - moving(recharging) account at moving a task is selectable.
+ - usage threshold notifier
+ - oom-killer disable knob and oom-notifier
+ - Root cgroup has no limit controls.
+
+ Kernel memory and Hugepages are not under control yet. We just manage
+ pages on LRU. To add more controls, we have to take care of performance.
+
+Brief summary of control files.
+
+ tasks                          # attach a task(thread) and show list of threads
+ cgroup.procs                   # show list of processes
+ cgroup.event_control           # an interface for event_fd()
+ memory.usage_in_bytes          # show current memory(RSS+Cache) usage.
+ memory.memsw.usage_in_bytes    # show current memory+Swap usage
+ memory.limit_in_bytes          # set/show limit of memory usage
+ memory.memsw.limit_in_bytes    # set/show limit of memory+Swap usage
+ memory.failcnt                         # show the number of memory usage hits limits
+ memory.memsw.failcnt           # show the number of memory+Swap hits limits
+ memory.max_usage_in_bytes      # show max memory usage recorded
+ memory.memsw.usage_in_bytes    # show max memory+Swap usage recorded
+ memory.soft_limit_in_bytes     # set/show soft limit of memory usage
+ memory.stat                    # show various statistics
+ memory.use_hierarchy           # set/show hierarchical account enabled
+ memory.force_empty             # trigger forced move charge to parent
+ memory.swappiness              # set/show swappiness parameter of vmscan
+                                (See sysctl's vm.swappiness)
+ memory.move_charge_at_immigrate # set/show controls of moving charges
+ memory.oom_control             # set/show oom controls.
+
 1. History
 
 The memory controller has a long history. A request for comments for the memory
@@ -106,14 +142,14 @@ the necessary data structures and check if the cgroup that is being charged
 is over its limit. If it is then reclaim is invoked on the cgroup.
 More details can be found in the reclaim section of this document.
 If everything goes well, a page meta-data-structure called page_cgroup is
-allocated and associated with the page.  This routine also adds the page to
-the per cgroup LRU.
+updated. page_cgroup has its own LRU on cgroup.
+(*) page_cgroup structure is allocated at boot/memory-hotplug time.
 
 2.2.1 Accounting details
 
 All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
-(some pages which never be reclaimable and will not be on global LRU
- are not accounted. we just accounts pages under usual vm management.)
+Some pages which are never reclaimable and will not be on the global LRU
+are not accounted. We just account pages under usual VM management.
 
 RSS pages are accounted at page_fault unless they've already been accounted
 for earlier. A file page will be accounted for as Page Cache when it's
@@ -121,12 +157,19 @@ inserted into inode (radix-tree). While it's mapped into the page tables of
 processes, duplicate accounting is carefully avoided.
 
 A RSS page is unaccounted when it's fully unmapped. A PageCache page is
-unaccounted when it's removed from radix-tree.
+unaccounted when it's removed from radix-tree. Even if RSS pages are fully
+unmapped (by kswapd), they may exist as SwapCache in the system until they
+are really freed. Such SwapCaches also also accounted.
+A swapped-in page is not accounted until it's mapped.
+
+Note: The kernel does swapin-readahead and read multiple swaps at once.
+This means swapped-in pages may contain pages for other tasks than a task
+causing page fault. So, we avoid accounting at swap-in I/O.
 
 At page migration, accounting information is kept.
 
-Note: we just account pages-on-lru because our purpose is to control amount
-of used pages. not-on-lru pages are tend to be out-of-control from vm view.
+Note: we just account pages-on-LRU because our purpose is to control amount
+of used pages; not-on-LRU pages tend to be out-of-control from VM view.
 
 2.3 Shared Page Accounting
 
@@ -143,6 +186,7 @@ caller of swapoff rather than the users of shmem.
 
 
 2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP)
+
 Swap Extension allows you to record charge for swap. A swapped-in page is
 charged back to original page allocator if possible.
 
@@ -150,13 +194,20 @@ When swap is accounted, following files are added.
  - memory.memsw.usage_in_bytes.
  - memory.memsw.limit_in_bytes.
 
-usage of mem+swap is limited by memsw.limit_in_bytes.
+memsw means memory+swap. Usage of memory+swap is limited by
+memsw.limit_in_bytes.
 
-* why 'mem+swap' rather than swap.
+Example: Assume a system with 4G of swap. A task which allocates 6G of memory
+(by mistake) under 2G memory limitation will use all swap.
+In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
+By using memsw limit, you can avoid system OOM which can be caused by swap
+shortage.
+
+* why 'memory+swap' rather than swap.
 The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
 to move account from memory to swap...there is no change in usage of
-mem+swap. In other words, when we want to limit the usage of swap without
-affecting global LRU, mem+swap limit is better than just limiting swap from
+memory+swap. In other words, when we want to limit the usage of swap without
+affecting global LRU, memory+swap limit is better than just limiting swap from
 OS point of view.
 
 * What happens when a cgroup hits memory.memsw.limit_in_bytes
@@ -168,12 +219,12 @@ it by cgroup.
 
 2.5 Reclaim
 
-Each cgroup maintains a per cgroup LRU that consists of an active
-and inactive list. When a cgroup goes over its limit, we first try
+Each cgroup maintains a per cgroup LRU which has the same structure as
+global VM. When a cgroup goes over its limit, we first try
 to reclaim memory from the cgroup so as to make space for the new
 pages that the cgroup has touched. If the reclaim is unsuccessful,
 an OOM routine is invoked to select and kill the bulkiest task in the
-cgroup.
+cgroup. (See 10. OOM Control below.)
 
 The reclaim algorithm has not been modified for cgroups, except that
 pages that are selected for reclaiming come from the per cgroup LRU
@@ -184,13 +235,22 @@ limits on the root cgroup.
 
 Note2: When panic_on_oom is set to "2", the whole system will panic.
 
-2. Locking
+When oom event notifier is registered, event will be delivered.
+(See oom_control section)
+
+2.6 Locking
 
-The memory controller uses the following hierarchy
+   lock_page_cgroup()/unlock_page_cgroup() should not be called under
+   mapping->tree_lock.
 
-1. zone->lru_lock is used for selecting pages to be isolated
-2. mem->per_zone->lru_lock protects the per cgroup LRU (per zone)
-3. lock_page_cgroup() is used to protect page->page_cgroup
+   Other lock order is following:
+   PG_locked.
+   mm->page_table_lock
+       zone->lru_lock
+         lock_page_cgroup.
+  In many cases, just lock_page_cgroup() is called.
+  per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
+  zone->lru_lock, it has no lock of its own.
 
 3. User Interface
 
@@ -199,6 +259,7 @@ The memory controller uses the following hierarchy
 a. Enable CONFIG_CGROUPS
 b. Enable CONFIG_RESOURCE_COUNTERS
 c. Enable CONFIG_CGROUP_MEM_RES_CTLR
+d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension)
 
 1. Prepare the cgroups
 # mkdir -p /cgroups
@@ -206,31 +267,28 @@ c. Enable CONFIG_CGROUP_MEM_RES_CTLR
 
 2. Make the new group and move bash into it
 # mkdir /cgroups/0
-# echo $$ >  /cgroups/0/tasks
+# echo $$ > /cgroups/0/tasks
 
-Since now we're in the 0 cgroup,
-We can alter the memory limit:
+Since now we're in the 0 cgroup, we can alter the memory limit:
 # echo 4M > /cgroups/0/memory.limit_in_bytes
 
 NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
-mega or gigabytes.
+mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
+
 NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
 NOTE: We cannot set limits on the root cgroup any more.
 
 # cat /cgroups/0/memory.limit_in_bytes
 4194304
 
-NOTE: The interface has now changed to display the usage in bytes
-instead of pages
-
 We can check the usage:
 # cat /cgroups/0/memory.usage_in_bytes
 1216512
 
 A successful write to this file does not guarantee a successful set of
-this limit to the value written into the file.  This can be due to a
+this limit to the value written into the file. This can be due to a
 number of factors, such as rounding up to page boundaries or the total
-availability of memory on the system.  The user is required to re-read
+availability of memory on the system. The user is required to re-read
 this file after a write to guarantee the value committed by the kernel.
 
 # echo 1 > memory.limit_in_bytes
@@ -245,15 +303,23 @@ caches, RSS and Active pages/Inactive pages are shown.
 
 4. Testing
 
-Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11].
-Apart from that v6 has been tested with several applications and regular
-daily use. The controller has also been tested on the PPC64, x86_64 and
-UML platforms.
+For testing features and implementation, see memcg_test.txt.
+
+Performance test is also important. To see pure memory controller's overhead,
+testing on tmpfs will give you good numbers of small overheads.
+Example: do kernel make on tmpfs.
+
+Page-fault scalability is also important. At measuring parallel
+page fault test, multi-process test may be better than multi-thread
+test because it has noise of shared objects/status.
+
+But the above two are testing extreme situations.
+Trying usual test under memory controller is always helpful.
 
 4.1 Troubleshooting
 
 Sometimes a user might find that the application under a cgroup is
-terminated. There are several causes for this:
+terminated by OOM killer. There are several causes for this:
 
 1. The cgroup limit is too low (just too low to do anything useful)
 2. The user is using anonymous memory and swap is turned off or too low
@@ -261,6 +327,9 @@ terminated. There are several causes for this:
 A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
 some of the pages cached in the cgroup (page cache pages).
 
+To know what happens, disable OOM_Kill by 10. OOM Control(see below) and
+seeing what happens will be helpful.
+
 4.2 Task migration
 
 When a task migrates from one cgroup to another, its charge is not
@@ -268,16 +337,19 @@ carried forward by default. The pages allocated from the original cgroup still
 remain charged to it, the charge is dropped when the page is freed or
 reclaimed.
 
-Note: You can move charges of a task along with task migration. See 8.
+You can move charges of a task along with task migration.
+See 8. "Move charges at task migration"
 
 4.3 Removing a cgroup
 
 A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
 cgroup might have some charge associated with it, even though all
-tasks have migrated away from it.
-Such charges are freed(at default) or moved to its parent. When moved,
-both of RSS and CACHES are moved to parent.
-If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also.
+tasks have migrated away from it. (because we charge against pages, not
+against tasks.)
+
+Such charges are freed or moved to their parent. At moving, both of RSS
+and CACHES are moved to parent.
+rmdir() may return -EBUSY if freeing/moving fails. See 5.1 also.
 
 Charges recorded in swap information is not updated at removal of cgroup.
 Recorded information is discarded and a cgroup which uses swap (swapcache)
@@ -293,10 +365,10 @@ will be charged as a new owner of it.
 
   # echo 0 > memory.force_empty
 
-  Almost all pages tracked by this memcg will be unmapped and freed. Some of
-  pages cannot be freed because it's locked or in-use. Such pages are moved
-  to parent and this cgroup will be empty. But this may return -EBUSY in
-  some too busy case.
+  Almost all pages tracked by this memory cgroup will be unmapped and freed.
+  Some pages cannot be freed because they are locked or in-use. Such pages are
+  moved to parent and this cgroup will be empty. This may return -EBUSY if
+  VM is too busy to free/move all pages immediately.
 
   Typical use case of this interface is that calling this before rmdir().
   Because rmdir() moves all pages to parent, some out-of-use page caches can be
@@ -306,19 +378,41 @@ will be charged as a new owner of it.
 
 memory.stat file includes following statistics
 
+# per-memory cgroup local status
 cache          - # of bytes of page cache memory.
 rss            - # of bytes of anonymous and swap cache memory.
+mapped_file    - # of bytes of mapped file (includes tmpfs/shmem)
 pgpgin         - # of pages paged in (equivalent to # of charging events).
 pgpgout                - # of pages paged out (equivalent to # of uncharging events).
-active_anon    - # of bytes of anonymous and  swap cache memory on active
-                 lru list.
+swap           - # of bytes of swap usage
 inactive_anon  - # of bytes of anonymous memory and swap cache memory on
-                 inactive lru list.
-active_file    - # of bytes of file-backed memory on active lru list.
-inactive_file  - # of bytes of file-backed memory on inactive lru list.
+               LRU list.
+active_anon    - # of bytes of anonymous and swap cache memory on active
+               inactive LRU list.
+inactive_file  - # of bytes of file-backed memory on inactive LRU list.
+active_file    - # of bytes of file-backed memory on active LRU list.
 unevictable    - # of bytes of memory that cannot be reclaimed (mlocked etc).
 
-The following additional stats are dependent on CONFIG_DEBUG_VM.
+# status considering hierarchy (see memory.use_hierarchy settings)
+
+hierarchical_memory_limit - # of bytes of memory limit with regard to hierarchy
+                       under which the memory cgroup is
+hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to
+                       hierarchy under which memory cgroup is.
+
+total_cache            - sum of all children's "cache"
+total_rss              - sum of all children's "rss"
+total_mapped_file      - sum of all children's "cache"
+total_pgpgin           - sum of all children's "pgpgin"
+total_pgpgout          - sum of all children's "pgpgout"
+total_swap             - sum of all children's "swap"
+total_inactive_anon    - sum of all children's "inactive_anon"
+total_active_anon      - sum of all children's "active_anon"
+total_inactive_file    - sum of all children's "inactive_file"
+total_active_file      - sum of all children's "active_file"
+total_unevictable      - sum of all children's "unevictable"
+
+# The following additional stats are dependent on CONFIG_DEBUG_VM.
 
 inactive_ratio         - VM internal parameter. (see mm/page_alloc.c)
 recent_rotated_anon    - VM internal parameter. (see mm/vmscan.c)
@@ -327,24 +421,37 @@ recent_scanned_anon       - VM internal parameter. (see mm/vmscan.c)
 recent_scanned_file    - VM internal parameter. (see mm/vmscan.c)
 
 Memo:
-       recent_rotated means recent frequency of lru rotation.
-       recent_scanned means recent # of scans to lru.
+       recent_rotated means recent frequency of LRU rotation.
+       recent_scanned means recent # of scans to LRU.
        showing for better debug please see the code for meanings.
 
 Note:
        Only anonymous and swap cache memory is listed as part of 'rss' stat.
        This should not be confused with the true 'resident set size' or the
-       amount of physical memory used by the cgroup. Per-cgroup rss
-       accounting is not done yet.
+       amount of physical memory used by the cgroup.
+       'rss + file_mapped" will give you resident set size of cgroup.
+       (Note: file and shmem may be shared among other cgroups. In that case,
+        file_mapped is accounted only when the memory cgroup is owner of page
+        cache.)
 
 5.3 swappiness
-  Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
 
-  Following cgroups' swappiness can't be changed.
-  - root cgroup (uses /proc/sys/vm/swappiness).
-  - a cgroup which uses hierarchy and it has child cgroup.
-  - a cgroup which uses hierarchy and not the root of hierarchy.
+Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
 
+Following cgroups' swappiness can't be changed.
+- root cgroup (uses /proc/sys/vm/swappiness).
+- a cgroup which uses hierarchy and it has other cgroup(s) below it.
+- a cgroup which uses hierarchy and not the root of hierarchy.
+
+5.4 failcnt
+
+A memory cgroup provides memory.failcnt and memory.memsw.failcnt files.
+This failcnt(== failure count) shows the number of times that a usage counter
+hit its limit. When a memory cgroup hits a limit, failcnt increases and
+memory under it will be reclaimed.
+
+You can reset failcnt by writing 0 to failcnt file.
+# echo 0 > .../memory.failcnt
 
 6. Hierarchy support
 
@@ -363,13 +470,13 @@ hierarchy
 
 In the diagram above, with hierarchical accounting enabled, all memory
 usage of e, is accounted to its ancestors up until the root (i.e, c and root),
-that has memory.use_hierarchy enabled.  If one of the ancestors goes over its
+that has memory.use_hierarchy enabled. If one of the ancestors goes over its
 limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
 children of the ancestor.
 
 6.1 Enabling hierarchical accounting and reclaim
 
-The memory controller by default disables the hierarchy feature. Support
+A memory cgroup by default disables the hierarchy feature. Support
 can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup
 
 # echo 1 > memory.use_hierarchy
@@ -379,10 +486,10 @@ The feature can be disabled by
 # echo 0 > memory.use_hierarchy
 
 NOTE1: Enabling/disabling will fail if the cgroup already has other
-cgroups created below it.
+       cgroups created below it.
 
 NOTE2: When panic_on_oom is set to "2", the whole system will panic in
-case of an oom event in any cgroup.
+       case of an OOM event in any cgroup.
 
 7. Soft limits
 
@@ -392,7 +499,7 @@ is to allow control groups to use as much of the memory as needed, provided
 a. There is no memory contention
 b. They do not exceed their hard limit
 
-When the system detects memory contention or low memory control groups
+When the system detects memory contention or low memory, control groups
 are pushed back to their soft limits. If the soft limit of each control
 group is very high, they are pushed back as much as possible to make
 sure that one control group does not starve the others of memory.
@@ -406,7 +513,7 @@ it gets invoked from balance_pgdat (kswapd).
 7.1 Interface
 
 Soft limits can be setup by using the following commands (in this example we
-assume a soft limit of 256 megabytes)
+assume a soft limit of 256 MiB)
 
 # echo 256M > memory.soft_limit_in_bytes
 
@@ -442,7 +549,7 @@ Note: Charges are moved only when you move mm->owner, IOW, a leader of a thread
 Note: If we cannot find enough space for the task in the destination cgroup, we
       try to make space by reclaiming memory. Task migration may fail if we
       cannot make enough space.
-Note: It can take several seconds if you move charges in giga bytes order.
+Note: It can take several seconds if you move charges much.
 
 And if you want disable it again:
 
@@ -451,21 +558,27 @@ And if you want disable it again:
 8.2 Type of charges which can be move
 
 Each bits of move_charge_at_immigrate has its own meaning about what type of
-charges should be moved.
+charges should be moved. But in any cases, it must be noted that an account of
+a page or a swap can be moved only when it is charged to the task's current(old)
+memory cgroup.
 
   bit | what type of charges would be moved ?
  -----+------------------------------------------------------------------------
    0  | A charge of an anonymous page(or swap of it) used by the target task.
       | Those pages and swaps must be used only by the target task. You must
       | enable Swap Extension(see 2.4) to enable move of swap charges.
-
-Note: Those pages and swaps must be charged to the old cgroup.
-Note: More type of pages(e.g. file cache, shmem,) will be supported by other
-      bits in future.
+ -----+------------------------------------------------------------------------
+   1  | A charge of file pages(normal file, tmpfs file(e.g. ipc shared memory)
+      | and swaps of tmpfs file) mmapped by the target task. Unlike the case of
+      | anonymous pages, file pages(and swaps) in the range mmapped by the task
+      | will be moved even if the task hasn't done page fault, i.e. they might
+      | not be the task's "RSS", but other task's "RSS" that maps the same file.
+      | And mapcount of the page is ignored(the page can be moved even if
+      | page_mapcount(page) > 1). You must enable Swap Extension(see 2.4) to
+      | enable move of swap charges.
 
 8.3 TODO
 
-- Add support for other types of pages(e.g. file cache, shmem, etc.).
 - Implement madvise(2) to let users decide the vma to be moved or not to be
   moved.
 - All of moving charge operations are done under cgroup_mutex. It's not good
@@ -473,22 +586,61 @@ Note: More type of pages(e.g. file cache, shmem,) will be supported by other
 
 9. Memory thresholds
 
-Memory controler implements memory thresholds using cgroups notification
+Memory cgroup implements memory thresholds using cgroups notification
 API (see cgroups.txt). It allows to register multiple memory and memsw
 thresholds and gets notifications when it crosses.
 
 To register a threshold application need:
- - create an eventfd using eventfd(2);
- - open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
- - write string like "<event_fd> <memory.usage_in_bytes> <threshold>" to
-   cgroup.event_control.
+- create an eventfd using eventfd(2);
+- open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
+- write string like "<event_fd> <fd of memory.usage_in_bytes> <threshold>" to
+  cgroup.event_control.
 
 Application will be notified through eventfd when memory usage crosses
 threshold in any direction.
 
 It's applicable for root and non-root cgroup.
 
-10. TODO
+10. OOM Control
+
+memory.oom_control file is for OOM notification and other controls.
+
+Memory cgroup implements OOM notifier using cgroup notification
+API (See cgroups.txt). It allows to register multiple OOM notification
+delivery and gets notification when OOM happens.
+
+To register a notifier, application need:
+ - create an eventfd using eventfd(2)
+ - open memory.oom_control file
+ - write string like "<event_fd> <fd of memory.oom_control>" to
+   cgroup.event_control
+
+Application will be notified through eventfd when OOM happens.
+OOM notification doesn't work for root cgroup.
+
+You can disable OOM-killer by writing "1" to memory.oom_control file, as:
+
+       #echo 1 > memory.oom_control
+
+This operation is only allowed to the top cgroup of sub-hierarchy.
+If OOM-killer is disabled, tasks under cgroup will hang/sleep
+in memory cgroup's OOM-waitqueue when they request accountable memory.
+
+For running them, you have to relax the memory cgroup's OOM status by
+       * enlarge limit or reduce usage.
+To reduce usage,
+       * kill some tasks.
+       * move some tasks to other group with account migration.
+       * remove some files (on tmpfs?)
+
+Then, stopped tasks will work again.
+
+At reading, current status of OOM is shown.
+       oom_kill_disable 0 or 1 (if 1, oom-killer is disabled)
+       under_oom        0 or 1 (if 1, the memory cgroup is under OOM, tasks may
+                                be stopped.)
+
+11. TODO
 
 1. Add support for accounting huge pages (as a separate controller)
 2. Make per-cgroup scanner reclaim not-shared pages first
index a86152a..672be01 100644 (file)
@@ -646,3 +646,13 @@ Who:       Thomas Gleixner <tglx@linutronix.de>
 
 ----------------------------
 
+What:  old ieee1394 subsystem (CONFIG_IEEE1394)
+When:  2.6.37
+Files: drivers/ieee1394/ except init_ohci1394_dma.c
+Why:   superseded by drivers/firewire/ (CONFIG_FIREWIRE) which offers more
+       features, better performance, and better security, all with smaller
+       and more modern code base
+Who:   Stefan Richter <stefanr@s5r6.in-berlin.de>
+
+----------------------------
+
index af16080..61c98f0 100644 (file)
@@ -429,8 +429,9 @@ check_flags:                no
 implementations.  If your fs is not using generic_file_llseek, you
 need to acquire and release the appropriate locks in your ->llseek().
 For many filesystems, it is probably safe to acquire the inode
-mutex.  Note some filesystems (i.e. remote ones) provide no
-protection for i_size so you will need to use the BKL.
+mutex or just to use i_size_read() instead.
+Note: this does not protect the file->f_pos against concurrent modifications
+since this is something the userspace has to take care about.
 
 Note: ext2_release() was *the* source of contention on fs-intensive
 loads and dropping BKL on ->release() helps to get rid of that (we still
index e93ad94..a200a38 100644 (file)
 Started Nov 1999 by Kanoj Sarcar <kanoj@sgi.com>
 
-The intent of this file is to have an uptodate, running commentary 
-from different people about NUMA specific code in the Linux vm.
-
-What is NUMA? It is an architecture where the memory access times
-for different regions of memory from a given processor varies
-according to the "distance" of the memory region from the processor.
-Each region of memory to which access times are the same from any 
-cpu, is called a node. On such architectures, it is beneficial if
-the kernel tries to minimize inter node communications. Schemes
-for this range from kernel text and read-only data replication
-across nodes, and trying to house all the data structures that
-key components of the kernel need on memory on that node.
-
-Currently, all the numa support is to provide efficient handling
-of widely discontiguous physical memory, so architectures which 
-are not NUMA but can have huge holes in the physical address space
-can use the same code. All this code is bracketed by CONFIG_DISCONTIGMEM.
-
-The initial port includes NUMAizing the bootmem allocator code by
-encapsulating all the pieces of information into a bootmem_data_t
-structure. Node specific calls have been added to the allocator. 
-In theory, any platform which uses the bootmem allocator should 
-be able to put the bootmem and mem_map data structures anywhere
-it deems best.
-
-Each node's page allocation data structures have also been encapsulated
-into a pg_data_t. The bootmem_data_t is just one part of this. To 
-make the code look uniform between NUMA and regular UMA platforms, 
-UMA platforms have a statically allocated pg_data_t too (contig_page_data).
-For the sake of uniformity, the function num_online_nodes() is also defined
-for all platforms. As we run benchmarks, we might decide to NUMAize 
-more variables like low_on_memory, nr_free_pages etc into the pg_data_t.
-
-The NUMA aware page allocation code currently tries to allocate pages 
-from different nodes in a round robin manner.  This will be changed to 
-do concentratic circle search, starting from current node, once the 
-NUMA port achieves more maturity. The call alloc_pages_node has been 
-added, so that drivers can make the call and not worry about whether 
-it is running on a NUMA or UMA platform.
+What is NUMA?
+
+This question can be answered from a couple of perspectives:  the
+hardware view and the Linux software view.
+
+From the hardware perspective, a NUMA system is a computer platform that
+comprises multiple components or assemblies each of which may contain 0
+or more CPUs, local memory, and/or IO buses.  For brevity and to
+disambiguate the hardware view of these physical components/assemblies
+from the software abstraction thereof, we'll call the components/assemblies
+'cells' in this document.
+
+Each of the 'cells' may be viewed as an SMP [symmetric multi-processor] subset
+of the system--although some components necessary for a stand-alone SMP system
+may not be populated on any given cell.   The cells of the NUMA system are
+connected together with some sort of system interconnect--e.g., a crossbar or
+point-to-point link are common types of NUMA system interconnects.  Both of
+these types of interconnects can be aggregated to create NUMA platforms with
+cells at multiple distances from other cells.
+
+For Linux, the NUMA platforms of interest are primarily what is known as Cache
+Coherent NUMA or ccNUMA systems.   With ccNUMA systems, all memory is visible
+to and accessible from any CPU attached to any cell and cache coherency
+is handled in hardware by the processor caches and/or the system interconnect.
+
+Memory access time and effective memory bandwidth varies depending on how far
+away the cell containing the CPU or IO bus making the memory access is from the
+cell containing the target memory.  For example, access to memory by CPUs
+attached to the same cell will experience faster access times and higher
+bandwidths than accesses to memory on other, remote cells.  NUMA platforms
+can have cells at multiple remote distances from any given cell.
+
+Platform vendors don't build NUMA systems just to make software developers'
+lives interesting.  Rather, this architecture is a means to provide scalable
+memory bandwidth.  However, to achieve scalable memory bandwidth, system and
+application software must arrange for a large majority of the memory references
+[cache misses] to be to "local" memory--memory on the same cell, if any--or
+to the closest cell with memory.
+
+This leads to the Linux software view of a NUMA system:
+
+Linux divides the system's hardware resources into multiple software
+abstractions called "nodes".  Linux maps the nodes onto the physical cells
+of the hardware platform, abstracting away some of the details for some
+architectures.  As with physical cells, software nodes may contain 0 or more
+CPUs, memory and/or IO buses.  And, again, memory accesses to memory on
+"closer" nodes--nodes that map to closer cells--will generally experience
+faster access times and higher effective bandwidth than accesses to more
+remote cells.
+
+For some architectures, such as x86, Linux will "hide" any node representing a
+physical cell that has no memory attached, and reassign any CPUs attached to
+that cell to a node representing a cell that does have memory.  Thus, on
+these architectures, one cannot assume that all CPUs that Linux associates with
+a given node will see the same local memory access times and bandwidth.
+
+In addition, for some architectures, again x86 is an example, Linux supports
+the emulation of additional nodes.  For NUMA emulation, linux will carve up
+the existing nodes--or the system memory for non-NUMA platforms--into multiple
+nodes.  Each emulated node will manage a fraction of the underlying cells'
+physical memory.  NUMA emluation is useful for testing NUMA kernel and
+application features on non-NUMA platforms, and as a sort of memory resource
+management mechanism when used together with cpusets.
+[see Documentation/cgroups/cpusets.txt]
+
+For each node with memory, Linux constructs an independent memory management
+subsystem, complete with its own free page lists, in-use page lists, usage
+statistics and locks to mediate access.  In addition, Linux constructs for
+each memory zone [one or more of DMA, DMA32, NORMAL, HIGH_MEMORY, MOVABLE],
+an ordered "zonelist".  A zonelist specifies the zones/nodes to visit when a
+selected zone/node cannot satisfy the allocation request.  This situation,
+when a zone has no available memory to satisfy a request, is called
+"overflow" or "fallback".
+
+Because some nodes contain multiple zones containing different types of
+memory, Linux must decide whether to order the zonelists such that allocations
+fall back to the same zone type on a different node, or to a different zone
+type on the same node.  This is an important consideration because some zones,
+such as DMA or DMA32, represent relatively scarce resources.  Linux chooses
+a default zonelist order based on the sizes of the various zone types relative
+to the total memory of the node and the total memory of the system.  The
+default zonelist order may be overridden using the numa_zonelist_order kernel
+boot parameter or sysctl.  [see Documentation/kernel-parameters.txt and
+Documentation/sysctl/vm.txt]
+
+By default, Linux will attempt to satisfy memory allocation requests from the
+node to which the CPU that executes the request is assigned.  Specifically,
+Linux will attempt to allocate from the first node in the appropriate zonelist
+for the node where the request originates.  This is called "local allocation."
+If the "local" node cannot satisfy the request, the kernel will examine other
+nodes' zones in the selected zonelist looking for the first zone in the list
+that can satisfy the request.
+
+Local allocation will tend to keep subsequent access to the allocated memory
+"local" to the underlying physical resources and off the system interconnect--
+as long as the task on whose behalf the kernel allocated some memory does not
+later migrate away from that memory.  The Linux scheduler is aware of the
+NUMA topology of the platform--embodied in the "scheduling domains" data
+structures [see Documentation/scheduler/sched-domains.txt]--and the scheduler
+attempts to minimize task migration to distant scheduling domains.  However,
+the scheduler does not take a task's NUMA footprint into account directly.
+Thus, under sufficient imbalance, tasks can migrate between nodes, remote
+from their initial node and kernel data structures.
+
+System administrators and application designers can restrict a task's migration
+to improve NUMA locality using various CPU affinity command line interfaces,
+such as taskset(1) and numactl(1), and program interfaces such as
+sched_setaffinity(2).  Further, one can modify the kernel's default local
+allocation behavior using Linux NUMA memory policy.
+[see Documentation/vm/numa_memory_policy.]
+
+System administrators can restrict the CPUs and nodes' memories that a non-
+privileged user can specify in the scheduling or NUMA commands and functions
+using control groups and CPUsets.  [see Documentation/cgroups/CPUsets.txt]
+
+On architectures that do not hide memoryless nodes, Linux will include only
+zones [nodes] with memory in the zonelists.  This means that for a memoryless
+node the "local memory node"--the node of the first zone in CPU's node's
+zonelist--will not be the node itself.  Rather, it will be the node that the
+kernel selected as the nearest node with memory when it built the zonelists.
+So, default, local allocations will succeed with the kernel supplying the
+closest available memory.  This is a consequence of the same mechanism that
+allows such allocations to fallback to other nearby nodes when a node that
+does contain memory overflows.
+
+Some kernel allocations do not want or cannot tolerate this allocation fallback
+behavior.  Rather they want to be sure they get memory from the specified node
+or get notified that the node has no free memory.  This is usually the case when
+a subsystem allocates per CPU memory resources, for example.
+
+A typical model for making such an allocation is to obtain the node id of the
+node to which the "current CPU" is attached using one of the kernel's
+numa_node_id() or CPU_to_node() functions and then request memory from only
+the node id returned.  When such an allocation fails, the requesting subsystem
+may revert to its own fallback path.  The slab kernel memory allocator is an
+example of this.  Or, the subsystem may choose to disable or not to enable
+itself on allocation failure.  The kernel profiling subsystem is an example of
+this.
+
+If the architecture supports--does not hide--memoryless nodes, then CPUs
+attached to memoryless nodes would always incur the fallback path overhead
+or some subsystems would fail to initialize if they attempted to allocated
+memory exclusively from a node without memory.  To support such
+architectures transparently, kernel subsystems can use the numa_mem_id()
+or cpu_to_mem() function to locate the "local memory node" for the calling or
+specified CPU.  Again, this is the same node from which default, local page
+allocations will be attempted.
index cf4db76..33047a6 100644 (file)
@@ -5007,6 +5007,12 @@ L:       linux-mmc@vger.kernel.org
 S:     Maintained
 F:     drivers/mmc/host/sdhci-s3c.c
 
+SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) ST SPEAR DRIVER
+M:     Viresh Kumar <viresh.kumar@st.com>
+L:     linux-mmc@vger.kernel.org
+S:     Maintained
+F:     drivers/mmc/host/sdhci-spear.c
+
 SECURITY SUBSYSTEM
 M:     James Morris <jmorris@namei.org>
 L:     linux-security-module@vger.kernel.org (suggested Cc:)
index 24efdfe..3e2e540 100644 (file)
@@ -61,6 +61,9 @@ config ZONE_DMA
 config NEED_DMA_MAP_STATE
        def_bool y
 
+config NEED_SG_DMA_LENGTH
+       def_bool y
+
 config GENERIC_ISA_DMA
        bool
        default y
index 440747c..5728c52 100644 (file)
@@ -1,24 +1,7 @@
 #ifndef _ALPHA_SCATTERLIST_H
 #define _ALPHA_SCATTERLIST_H
 
-#include <asm/page.h>
-#include <asm/types.h>
-  
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long sg_magic;
-#endif
-       unsigned long page_link;
-       unsigned int offset;
-
-       unsigned int length;
-
-       dma_addr_t dma_address;
-       __u32 dma_length;
-};
-
-#define sg_dma_address(sg)     ((sg)->dma_address)
-#define sg_dma_len(sg)         ((sg)->dma_length)
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD (~0UL)
 
index 9236475..43af89c 100644 (file)
@@ -1,12 +1,14 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.34
-# Sat May 22 03:17:31 2010
+# Wed May 26 19:04:29 2010
 #
 CONFIG_ARM=y
 CONFIG_HAVE_PWM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
 CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_ARCH_USES_GETTIMEOFFSET=y
 CONFIG_HAVE_PROC_CPU=y
 CONFIG_NO_IOPORT=y
 CONFIG_GENERIC_HARDIRQS=y
@@ -35,6 +37,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
 CONFIG_HAVE_KERNEL_LZO=y
 CONFIG_KERNEL_GZIP=y
 # CONFIG_KERNEL_BZIP2 is not set
@@ -186,9 +189,11 @@ CONFIG_MMU=y
 # CONFIG_ARCH_INTEGRATOR is not set
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_VEXPRESS is not set
 # CONFIG_ARCH_AT91 is not set
 # CONFIG_ARCH_BCMRING is not set
 # CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_CNS3XXX is not set
 # CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
@@ -224,7 +229,7 @@ CONFIG_ARCH_S3C2410=y
 # CONFIG_ARCH_S3C64XX is not set
 # CONFIG_ARCH_S5P6440 is not set
 # CONFIG_ARCH_S5P6442 is not set
-# CONFIG_ARCH_S5PC1XX is not set
+# CONFIG_ARCH_S5PC100 is not set
 # CONFIG_ARCH_S5PV210 is not set
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
@@ -233,6 +238,7 @@ CONFIG_ARCH_S3C2410=y
 # CONFIG_ARCH_NOMADIK is not set
 # CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
+# CONFIG_PLAT_SPEAR is not set
 CONFIG_PLAT_SAMSUNG=y
 
 #
@@ -243,11 +249,15 @@ CONFIG_S3C_BOOT_ERROR_RESET=y
 CONFIG_S3C_BOOT_UART_FORCE_FIFO=y
 CONFIG_S3C_LOWLEVEL_UART_PORT=0
 CONFIG_SAMSUNG_CLKSRC=y
+CONFIG_S3C_GPIO_CFG_S3C24XX=y
+CONFIG_S3C_GPIO_PULL_UP=y
 CONFIG_SAMSUNG_GPIO_EXTRA=0
 CONFIG_S3C_GPIO_SPACE=0
 CONFIG_S3C_ADC=y
 CONFIG_S3C_DEV_HSMMC=y
+CONFIG_S3C_DEV_HWMON=y
 CONFIG_S3C_DEV_USB_HOST=y
+CONFIG_S3C_DEV_WDT=y
 CONFIG_S3C_DEV_NAND=y
 CONFIG_S3C_DMA=y
 
@@ -260,6 +270,7 @@ CONFIG_PLAT_S3C24XX=y
 CONFIG_CPU_LLSERIAL_S3C2410=y
 CONFIG_CPU_LLSERIAL_S3C2440=y
 CONFIG_S3C2410_CLOCK=y
+CONFIG_S3C2443_CLOCK=y
 CONFIG_S3C24XX_DCLK=y
 CONFIG_S3C24XX_PWM=y
 CONFIG_S3C24XX_GPIO_EXTRA=128
@@ -270,6 +281,7 @@ CONFIG_S3C2410_DMA=y
 # CONFIG_S3C2410_DMA_DEBUG is not set
 CONFIG_MACH_SMDK=y
 CONFIG_S3C24XX_SIMTEC_AUDIO=y
+CONFIG_S3C2410_SETUP_TS=y
 
 #
 # S3C2400 Machines
@@ -289,6 +301,7 @@ CONFIG_ARCH_H1940=y
 # CONFIG_H1940BT is not set
 CONFIG_PM_H1940=y
 CONFIG_MACH_N30=y
+CONFIG_MACH_N35=y
 CONFIG_ARCH_BAST=y
 CONFIG_MACH_OTOM=y
 CONFIG_MACH_AML_M5900=y
@@ -309,6 +322,11 @@ CONFIG_MACH_SMDK2413=y
 CONFIG_MACH_S3C2413=y
 CONFIG_MACH_SMDK2412=y
 CONFIG_MACH_VSTMS=y
+
+#
+# S3C2416 Machines
+#
+# CONFIG_MACH_SMDK2416 is not set
 CONFIG_CPU_S3C2440=y
 CONFIG_CPU_S3C2442=y
 CONFIG_CPU_S3C244X=y
@@ -330,6 +348,7 @@ CONFIG_SMDK2440_CPU2440=y
 CONFIG_SMDK2440_CPU2442=y
 CONFIG_MACH_AT2440EVB=y
 CONFIG_MACH_MINI2440=y
+# CONFIG_MACH_RX1950 is not set
 CONFIG_CPU_S3C2443=y
 CONFIG_S3C2443_DMA=y
 
@@ -410,6 +429,7 @@ CONFIG_ALIGNMENT_TRAP=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_CMDLINE="root=/dev/hda1 ro init=/bin/bash console=ttySAC0"
+# CONFIG_CMDLINE_FORCE is not set
 # CONFIG_XIP_KERNEL is not set
 # CONFIG_KEXEC is not set
 
@@ -509,7 +529,9 @@ CONFIG_TCP_CONG_ILLINOIS=m
 # CONFIG_DEFAULT_BIC is not set
 CONFIG_DEFAULT_CUBIC=y
 # CONFIG_DEFAULT_HTCP is not set
+# CONFIG_DEFAULT_HYBLA is not set
 # CONFIG_DEFAULT_VEGAS is not set
+# CONFIG_DEFAULT_VENO is not set
 # CONFIG_DEFAULT_WESTWOOD is not set
 # CONFIG_DEFAULT_RENO is not set
 CONFIG_DEFAULT_TCP_CONG="cubic"
@@ -566,6 +588,16 @@ CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
 # CONFIG_NETFILTER_TPROXY is not set
 CONFIG_NETFILTER_XTABLES=m
+
+#
+# Xtables combined modules
+#
+CONFIG_NETFILTER_XT_MARK=m
+CONFIG_NETFILTER_XT_CONNMARK=m
+
+#
+# Xtables targets
+#
 CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
 CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
 # CONFIG_NETFILTER_XT_TARGET_CT is not set
@@ -577,9 +609,14 @@ CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
 # CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set
 CONFIG_NETFILTER_XT_TARGET_RATEEST=m
+# CONFIG_NETFILTER_XT_TARGET_TEE is not set
 # CONFIG_NETFILTER_XT_TARGET_TRACE is not set
 CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 # CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set
+
+#
+# Xtables matches
+#
 CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
 CONFIG_NETFILTER_XT_MATCH_COMMENT=m
 CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
@@ -598,6 +635,7 @@ CONFIG_NETFILTER_XT_MATCH_LIMIT=m
 CONFIG_NETFILTER_XT_MATCH_MAC=m
 CONFIG_NETFILTER_XT_MATCH_MARK=m
 CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+# CONFIG_NETFILTER_XT_MATCH_OSF is not set
 CONFIG_NETFILTER_XT_MATCH_OWNER=m
 CONFIG_NETFILTER_XT_MATCH_POLICY=m
 CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
@@ -605,7 +643,6 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
-# CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT is not set
 CONFIG_NETFILTER_XT_MATCH_SCTP=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
@@ -613,7 +650,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NETFILTER_XT_MATCH_TIME=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
-# CONFIG_NETFILTER_XT_MATCH_OSF is not set
 CONFIG_IP_VS=m
 # CONFIG_IP_VS_IPV6 is not set
 # CONFIG_IP_VS_DEBUG is not set
@@ -713,6 +749,7 @@ CONFIG_IP6_NF_RAW=m
 # CONFIG_RDS is not set
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
+# CONFIG_L2TP is not set
 # CONFIG_BRIDGE is not set
 # CONFIG_NET_DSA is not set
 # CONFIG_VLAN_8021Q is not set
@@ -739,6 +776,7 @@ CONFIG_NET_CLS_ROUTE=y
 # CONFIG_IRDA is not set
 CONFIG_BT=m
 CONFIG_BT_L2CAP=m
+# CONFIG_BT_L2CAP_EXT_FEATURES is not set
 CONFIG_BT_SCO=m
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
@@ -775,6 +813,7 @@ CONFIG_CFG80211_WEXT=y
 CONFIG_WIRELESS_EXT_SYSFS=y
 # CONFIG_LIB80211 is not set
 CONFIG_MAC80211=m
+CONFIG_MAC80211_HAS_RC=y
 CONFIG_MAC80211_RC_MINSTREL=y
 # CONFIG_MAC80211_RC_DEFAULT_PID is not set
 CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
@@ -785,6 +824,7 @@ CONFIG_MAC80211_LEDS=y
 # CONFIG_WIMAX is not set
 # CONFIG_RFKILL is not set
 # CONFIG_NET_9P is not set
+# CONFIG_CAIF is not set
 
 #
 # Device Drivers
@@ -828,6 +868,7 @@ CONFIG_MTD_BLOCK=y
 # CONFIG_INFTL is not set
 # CONFIG_RFD_FTL is not set
 # CONFIG_SSFDC is not set
+# CONFIG_SM_FTL is not set
 # CONFIG_MTD_OOPS is not set
 
 #
@@ -882,9 +923,12 @@ CONFIG_MTD_ROM=y
 # CONFIG_MTD_DOC2001 is not set
 # CONFIG_MTD_DOC2001PLUS is not set
 CONFIG_MTD_NAND=y
-# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+CONFIG_MTD_NAND_ECC=y
 # CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+# CONFIG_MTD_SM_COMMON is not set
 # CONFIG_MTD_NAND_MUSEUM_IDS is not set
+CONFIG_MTD_NAND_DENALI_SCRATCH_REG_ADDR=0xFF108018
 # CONFIG_MTD_NAND_GPIO is not set
 CONFIG_MTD_NAND_IDS=y
 CONFIG_MTD_NAND_S3C2410=y
@@ -1149,6 +1193,7 @@ CONFIG_KEYBOARD_ATKBD=y
 # CONFIG_QT2160 is not set
 # CONFIG_KEYBOARD_LKKBD is not set
 # CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_KEYBOARD_TCA6416 is not set
 # CONFIG_KEYBOARD_MATRIX is not set
 # CONFIG_KEYBOARD_LM8323 is not set
 # CONFIG_KEYBOARD_MAX7359 is not set
@@ -1212,6 +1257,7 @@ CONFIG_INPUT_TOUCHSCREEN=y
 # CONFIG_TOUCHSCREEN_AD7879_SPI is not set
 # CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_DYNAPRO is not set
+# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set
 # CONFIG_TOUCHSCREEN_EETI is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_S3C2410 is not set
@@ -1248,6 +1294,7 @@ CONFIG_TOUCHSCREEN_USB_NEXIO=y
 # CONFIG_TOUCHSCREEN_TSC2007 is not set
 # CONFIG_TOUCHSCREEN_W90X900 is not set
 CONFIG_INPUT_MISC=y
+# CONFIG_INPUT_AD714X is not set
 CONFIG_INPUT_ATI_REMOTE=m
 CONFIG_INPUT_ATI_REMOTE2=m
 CONFIG_INPUT_KEYSPAN_REMOTE=m
@@ -1255,6 +1302,7 @@ CONFIG_INPUT_POWERMATE=m
 CONFIG_INPUT_YEALINK=m
 CONFIG_INPUT_CM109=m
 CONFIG_INPUT_UINPUT=m
+# CONFIG_INPUT_PCF8574 is not set
 CONFIG_INPUT_GPIO_ROTARY_ENCODER=m
 
 #
@@ -1287,6 +1335,7 @@ CONFIG_SERIAL_NONSTANDARD=y
 # CONFIG_MOXA_INTELLIO is not set
 # CONFIG_MOXA_SMARTIO is not set
 # CONFIG_N_HDLC is not set
+# CONFIG_N_GSM is not set
 # CONFIG_RISCOM8 is not set
 # CONFIG_SPECIALIX is not set
 # CONFIG_STALDRV is not set
@@ -1324,6 +1373,8 @@ CONFIG_SERIAL_S3C2440=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_TIMBERDALE is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
@@ -1499,6 +1550,7 @@ CONFIG_SENSORS_LM85=m
 # CONFIG_SENSORS_SMSC47M192 is not set
 # CONFIG_SENSORS_SMSC47B397 is not set
 # CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_ADS7871 is not set
 # CONFIG_SENSORS_AMC6821 is not set
 # CONFIG_SENSORS_THMC50 is not set
 # CONFIG_SENSORS_TMP401 is not set
@@ -1836,10 +1888,12 @@ CONFIG_USB_SERIAL_PL2303=y
 # CONFIG_USB_SERIAL_TI is not set
 # CONFIG_USB_SERIAL_CYBERJACK is not set
 # CONFIG_USB_SERIAL_XIRCOM is not set
+CONFIG_USB_SERIAL_WWAN=m
 CONFIG_USB_SERIAL_OPTION=m
 # CONFIG_USB_SERIAL_OMNINET is not set
 # CONFIG_USB_SERIAL_OPTICON is not set
 # CONFIG_USB_SERIAL_VIVOPAY_SERIAL is not set
+# CONFIG_USB_SERIAL_ZIO is not set
 # CONFIG_USB_SERIAL_DEBUG is not set
 
 #
@@ -1999,10 +2053,6 @@ CONFIG_RTC_DRV_S3C=y
 # CONFIG_DMADEVICES is not set
 # CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
-
-#
-# TI VLYNQ
-#
 # CONFIG_STAGING is not set
 
 #
@@ -2274,6 +2324,7 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_TRACING_SUPPORT=y
 CONFIG_FTRACE=y
 # CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_ENABLE_DEFAULT_TRACERS is not set
 # CONFIG_BOOT_TRACER is not set
@@ -2284,6 +2335,7 @@ CONFIG_BRANCH_PROFILE_NONE=y
 # CONFIG_KMEMTRACE is not set
 # CONFIG_WORKQUEUE_TRACER is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
index a3a9993..7d8b4cf 100644 (file)
@@ -1,11 +1,13 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.34
-# Sat May 22 03:17:32 2010
+# Wed May 26 19:04:30 2010
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
 CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_ARCH_USES_GETTIMEOFFSET=y
 CONFIG_HAVE_PROC_CPU=y
 CONFIG_NO_IOPORT=y
 CONFIG_GENERIC_HARDIRQS=y
@@ -34,6 +36,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
 CONFIG_HAVE_KERNEL_LZO=y
 CONFIG_KERNEL_GZIP=y
 # CONFIG_KERNEL_BZIP2 is not set
@@ -179,9 +182,11 @@ CONFIG_MMU=y
 # CONFIG_ARCH_INTEGRATOR is not set
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_VEXPRESS is not set
 # CONFIG_ARCH_AT91 is not set
 # CONFIG_ARCH_BCMRING is not set
 # CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_CNS3XXX is not set
 # CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
@@ -217,7 +222,7 @@ CONFIG_MMU=y
 CONFIG_ARCH_S3C64XX=y
 # CONFIG_ARCH_S5P6440 is not set
 # CONFIG_ARCH_S5P6442 is not set
-# CONFIG_ARCH_S5PC1XX is not set
+# CONFIG_ARCH_S5PC100 is not set
 # CONFIG_ARCH_S5PV210 is not set
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
@@ -226,6 +231,7 @@ CONFIG_ARCH_S3C64XX=y
 # CONFIG_ARCH_NOMADIK is not set
 # CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
+# CONFIG_PLAT_SPEAR is not set
 CONFIG_PLAT_SAMSUNG=y
 
 #
@@ -251,7 +257,10 @@ CONFIG_S3C_DEV_I2C1=y
 CONFIG_S3C_DEV_FB=y
 CONFIG_S3C_DEV_USB_HOST=y
 CONFIG_S3C_DEV_USB_HSOTG=y
+CONFIG_S3C_DEV_WDT=y
 CONFIG_S3C_DEV_NAND=y
+CONFIG_SAMSUNG_DEV_ADC=y
+CONFIG_SAMSUNG_DEV_TS=y
 CONFIG_S3C_DMA=y
 
 #
@@ -260,6 +269,7 @@ CONFIG_S3C_DMA=y
 # CONFIG_SAMSUNG_PM_DEBUG is not set
 # CONFIG_S3C_PM_DEBUG_LED_SMDK is not set
 # CONFIG_SAMSUNG_PM_CHECK is not set
+CONFIG_SAMSUNG_WAKEMASK=y
 CONFIG_PLAT_S3C64XX=y
 CONFIG_CPU_S3C6410=y
 CONFIG_S3C64XX_DMA=y
@@ -277,6 +287,8 @@ CONFIG_SMDK6410_SD_CH0=y
 # CONFIG_SMDK6410_WM1192_EV1 is not set
 # CONFIG_MACH_NCP is not set
 # CONFIG_MACH_HMT is not set
+# CONFIG_MACH_SMARTQ5 is not set
+# CONFIG_MACH_SMARTQ7 is not set
 
 #
 # Processor Type
@@ -302,6 +314,7 @@ CONFIG_ARM_THUMB=y
 # CONFIG_CPU_DCACHE_DISABLE is not set
 # CONFIG_CPU_BPREDICT_DISABLE is not set
 CONFIG_ARM_L1_CACHE_SHIFT=5
+CONFIG_ARM_DMA_MEM_BUFFERABLE=y
 CONFIG_CPU_HAS_PMU=y
 # CONFIG_ARM_ERRATA_411920 is not set
 CONFIG_ARM_VIC=y
@@ -352,6 +365,7 @@ CONFIG_ALIGNMENT_TRAP=y
 CONFIG_ZBOOT_ROM_TEXT=0
 CONFIG_ZBOOT_ROM_BSS=0
 CONFIG_CMDLINE="console=ttySAC0,115200 root=/dev/ram init=/linuxrc initrd=0x51000000,6M ramdisk_size=6144"
+# CONFIG_CMDLINE_FORCE is not set
 # CONFIG_XIP_KERNEL is not set
 # CONFIG_KEXEC is not set
 
@@ -430,6 +444,7 @@ CONFIG_MTD=y
 # CONFIG_INFTL is not set
 # CONFIG_RFD_FTL is not set
 # CONFIG_SSFDC is not set
+# CONFIG_SM_FTL is not set
 # CONFIG_MTD_OOPS is not set
 
 #
@@ -472,9 +487,12 @@ CONFIG_MTD_CFI_I2=y
 # CONFIG_MTD_DOC2001 is not set
 # CONFIG_MTD_DOC2001PLUS is not set
 CONFIG_MTD_NAND=y
-# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+CONFIG_MTD_NAND_ECC=y
 # CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+# CONFIG_MTD_SM_COMMON is not set
 # CONFIG_MTD_NAND_MUSEUM_IDS is not set
+CONFIG_MTD_NAND_DENALI_SCRATCH_REG_ADDR=0xFF108018
 # CONFIG_MTD_NAND_GPIO is not set
 CONFIG_MTD_NAND_IDS=y
 CONFIG_MTD_NAND_S3C2410=y
@@ -569,6 +587,7 @@ CONFIG_KEYBOARD_ATKBD=y
 # CONFIG_QT2160 is not set
 # CONFIG_KEYBOARD_LKKBD is not set
 # CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_KEYBOARD_TCA6416 is not set
 # CONFIG_KEYBOARD_MATRIX is not set
 # CONFIG_KEYBOARD_MAX7359 is not set
 # CONFIG_KEYBOARD_NEWTON is not set
@@ -638,6 +657,8 @@ CONFIG_SERIAL_S3C6400=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_TIMBERDALE is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
@@ -791,6 +812,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_LIS3_I2C is not set
 # CONFIG_THERMAL is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_HAVE_S3C2410_WATCHDOG=y
 CONFIG_SSB_POSSIBLE=y
 
 #
@@ -877,6 +899,7 @@ CONFIG_SND_SOC=m
 CONFIG_SND_SOC_AC97_BUS=y
 CONFIG_SND_S3C24XX_SOC=m
 CONFIG_SND_S3C_SOC_AC97=m
+# CONFIG_SND_S3C64XX_SOC_WM8580 is not set
 CONFIG_SND_SOC_SMDK_WM9713=m
 CONFIG_SND_SOC_I2C_AND_SPI=m
 # CONFIG_SND_SOC_ALL_CODECS is not set
@@ -936,10 +959,6 @@ CONFIG_RTC_LIB=y
 # CONFIG_DMADEVICES is not set
 # CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
-
-#
-# TI VLYNQ
-#
 # CONFIG_STAGING is not set
 
 #
@@ -1096,6 +1115,7 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_TRACING_SUPPORT=y
 CONFIG_FTRACE=y
 # CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_ENABLE_DEFAULT_TRACERS is not set
 # CONFIG_BOOT_TRACER is not set
@@ -1106,6 +1126,7 @@ CONFIG_BRANCH_PROFILE_NONE=y
 # CONFIG_KMEMTRACE is not set
 # CONFIG_WORKQUEUE_TRACER is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
index 619bfab..532e987 100644 (file)
@@ -1,11 +1,13 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.34
-# Sat May 22 03:18:18 2010
+# Wed May 26 19:04:32 2010
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
 CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_ARCH_USES_GETTIMEOFFSET=y
 CONFIG_HAVE_PROC_CPU=y
 CONFIG_NO_IOPORT=y
 CONFIG_GENERIC_HARDIRQS=y
@@ -33,6 +35,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
 CONFIG_HAVE_KERNEL_LZO=y
 CONFIG_KERNEL_GZIP=y
 # CONFIG_KERNEL_BZIP2 is not set
@@ -178,9 +181,11 @@ CONFIG_MMU=y
 # CONFIG_ARCH_INTEGRATOR is not set
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_VEXPRESS is not set
 # CONFIG_ARCH_AT91 is not set
 # CONFIG_ARCH_BCMRING is not set
 # CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_CNS3XXX is not set
 # CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
@@ -216,7 +221,7 @@ CONFIG_MMU=y
 # CONFIG_ARCH_S3C64XX is not set
 CONFIG_ARCH_S5P6440=y
 # CONFIG_ARCH_S5P6442 is not set
-# CONFIG_ARCH_S5PC1XX is not set
+# CONFIG_ARCH_S5PC100 is not set
 # CONFIG_ARCH_S5PV210 is not set
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
@@ -225,6 +230,7 @@ CONFIG_ARCH_S5P6440=y
 # CONFIG_ARCH_NOMADIK is not set
 # CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
+# CONFIG_PLAT_SPEAR is not set
 CONFIG_PLAT_SAMSUNG=y
 
 #
@@ -240,10 +246,15 @@ CONFIG_SAMSUNG_GPIOLIB_4BIT=y
 CONFIG_S3C_GPIO_CFG_S3C24XX=y
 CONFIG_S3C_GPIO_CFG_S3C64XX=y
 CONFIG_S3C_GPIO_PULL_UPDOWN=y
+CONFIG_S5P_GPIO_DRVSTR=y
 CONFIG_SAMSUNG_GPIO_EXTRA=0
 CONFIG_S3C_GPIO_SPACE=0
 CONFIG_S3C_GPIO_TRACK=y
 # CONFIG_S3C_ADC is not set
+CONFIG_S3C_DEV_WDT=y
+CONFIG_SAMSUNG_DEV_ADC=y
+CONFIG_SAMSUNG_DEV_TS=y
+CONFIG_S3C_PL330_DMA=y
 
 #
 # Power management
@@ -276,10 +287,12 @@ CONFIG_ARM_THUMB=y
 # CONFIG_CPU_DCACHE_DISABLE is not set
 # CONFIG_CPU_BPREDICT_DISABLE is not set
 CONFIG_ARM_L1_CACHE_SHIFT=5
+CONFIG_ARM_DMA_MEM_BUFFERABLE=y
 CONFIG_CPU_HAS_PMU=y
 # CONFIG_ARM_ERRATA_411920 is not set
 CONFIG_ARM_VIC=y
 CONFIG_ARM_VIC_NR=2
+CONFIG_PL330=y
 
 #
 # Bus support
@@ -326,6 +339,7 @@ CONFIG_ALIGNMENT_TRAP=y
 CONFIG_ZBOOT_ROM_TEXT=0
 CONFIG_ZBOOT_ROM_BSS=0
 CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x20800000,8M console=ttySAC1,115200 init=/linuxrc"
+# CONFIG_CMDLINE_FORCE is not set
 # CONFIG_XIP_KERNEL is not set
 # CONFIG_KEXEC is not set
 
@@ -490,7 +504,9 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y
 CONFIG_INPUT_TOUCHSCREEN=y
 # CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_DYNAPRO is not set
+# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
+# CONFIG_TOUCHSCREEN_S3C2410 is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
 # CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
@@ -546,6 +562,8 @@ CONFIG_SERIAL_S3C6400=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_TIMBERDALE is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
@@ -593,6 +611,7 @@ CONFIG_GPIOLIB=y
 # CONFIG_HWMON is not set
 # CONFIG_THERMAL is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_HAVE_S3C2410_WATCHDOG=y
 CONFIG_SSB_POSSIBLE=y
 
 #
@@ -649,10 +668,6 @@ CONFIG_RTC_LIB=y
 # CONFIG_DMADEVICES is not set
 # CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
-
-#
-# TI VLYNQ
-#
 # CONFIG_STAGING is not set
 
 #
@@ -850,6 +865,7 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_TRACING_SUPPORT=y
 CONFIG_FTRACE=y
 # CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_ENABLE_DEFAULT_TRACERS is not set
 # CONFIG_BOOT_TRACER is not set
@@ -860,6 +876,7 @@ CONFIG_BRANCH_PROFILE_NONE=y
 # CONFIG_KMEMTRACE is not set
 # CONFIG_WORKQUEUE_TRACER is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
index d7ea275..068219b 100644 (file)
@@ -1,11 +1,13 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.34
-# Sat May 22 03:18:19 2010
+# Wed May 26 19:04:34 2010
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
 CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_ARCH_USES_GETTIMEOFFSET=y
 CONFIG_HAVE_PROC_CPU=y
 CONFIG_NO_IOPORT=y
 CONFIG_GENERIC_HARDIRQS=y
@@ -33,6 +35,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
 CONFIG_HAVE_KERNEL_LZO=y
 CONFIG_KERNEL_GZIP=y
 # CONFIG_KERNEL_BZIP2 is not set
@@ -178,9 +181,11 @@ CONFIG_MMU=y
 # CONFIG_ARCH_INTEGRATOR is not set
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_VEXPRESS is not set
 # CONFIG_ARCH_AT91 is not set
 # CONFIG_ARCH_BCMRING is not set
 # CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_CNS3XXX is not set
 # CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
@@ -216,7 +221,7 @@ CONFIG_MMU=y
 # CONFIG_ARCH_S3C64XX is not set
 # CONFIG_ARCH_S5P6440 is not set
 CONFIG_ARCH_S5P6442=y
-# CONFIG_ARCH_S5PC1XX is not set
+# CONFIG_ARCH_S5PC100 is not set
 # CONFIG_ARCH_S5PV210 is not set
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
@@ -225,6 +230,7 @@ CONFIG_ARCH_S5P6442=y
 # CONFIG_ARCH_NOMADIK is not set
 # CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
+# CONFIG_PLAT_SPEAR is not set
 CONFIG_PLAT_SAMSUNG=y
 
 #
@@ -240,10 +246,12 @@ CONFIG_SAMSUNG_GPIOLIB_4BIT=y
 CONFIG_S3C_GPIO_CFG_S3C24XX=y
 CONFIG_S3C_GPIO_CFG_S3C64XX=y
 CONFIG_S3C_GPIO_PULL_UPDOWN=y
+CONFIG_S5P_GPIO_DRVSTR=y
 CONFIG_SAMSUNG_GPIO_EXTRA=0
 CONFIG_S3C_GPIO_SPACE=0
 CONFIG_S3C_GPIO_TRACK=y
 # CONFIG_S3C_ADC is not set
+CONFIG_S3C_PL330_DMA=y
 
 #
 # Power management
@@ -276,10 +284,12 @@ CONFIG_ARM_THUMB=y
 # CONFIG_CPU_DCACHE_DISABLE is not set
 # CONFIG_CPU_BPREDICT_DISABLE is not set
 CONFIG_ARM_L1_CACHE_SHIFT=5
+CONFIG_ARM_DMA_MEM_BUFFERABLE=y
 CONFIG_CPU_HAS_PMU=y
 # CONFIG_ARM_ERRATA_411920 is not set
 CONFIG_ARM_VIC=y
 CONFIG_ARM_VIC_NR=2
+CONFIG_PL330=y
 
 #
 # Bus support
@@ -326,6 +336,7 @@ CONFIG_ALIGNMENT_TRAP=y
 CONFIG_ZBOOT_ROM_TEXT=0
 CONFIG_ZBOOT_ROM_BSS=0
 CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x20800000,8M console=ttySAC1,115200 init=/linuxrc"
+# CONFIG_CMDLINE_FORCE is not set
 # CONFIG_XIP_KERNEL is not set
 # CONFIG_KEXEC is not set
 
@@ -471,6 +482,7 @@ CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_TOUCHSCREEN=y
 # CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_DYNAPRO is not set
+# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -525,6 +537,8 @@ CONFIG_SERIAL_S5PV210=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_TIMBERDALE is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
@@ -624,10 +638,6 @@ CONFIG_RTC_LIB=y
 # CONFIG_DMADEVICES is not set
 # CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
-
-#
-# TI VLYNQ
-#
 # CONFIG_STAGING is not set
 
 #
@@ -836,6 +846,7 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_TRACING_SUPPORT=y
 CONFIG_FTRACE=y
 # CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_ENABLE_DEFAULT_TRACERS is not set
 # CONFIG_BOOT_TRACER is not set
@@ -846,6 +857,7 @@ CONFIG_BRANCH_PROFILE_NONE=y
 # CONFIG_KMEMTRACE is not set
 # CONFIG_WORKQUEUE_TRACER is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
index 2053be6..ebc6245 100644 (file)
@@ -1,12 +1,14 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.30
-# Wed Jul  1 15:53:07 2009
+# Linux kernel version: 2.6.34
+# Wed May 26 19:04:35 2010
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
 CONFIG_GENERIC_GPIO=y
-CONFIG_MMU=y
+CONFIG_GENERIC_TIME=y
+CONFIG_ARCH_USES_GETTIMEOFFSET=y
+CONFIG_HAVE_PROC_CPU=y
 CONFIG_NO_IOPORT=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_STACKTRACE_SUPPORT=y
@@ -18,7 +20,9 @@ CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_NEED_DMA_MAP_STATE=y
 CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_ARM_L1_CACHE_SHIFT_6=y
 CONFIG_VECTORS_BASE=0xffff0000
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 CONFIG_CONSTRUCTORS=y
@@ -31,6 +35,13 @@ CONFIG_BROKEN_ON_SMP=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_HAVE_KERNEL_LZO=y
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
+# CONFIG_KERNEL_LZO is not set
 CONFIG_SWAP=y
 # CONFIG_SYSVIPC is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
@@ -38,14 +49,15 @@ CONFIG_SWAP=y
 #
 # RCU Subsystem
 #
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_TINY_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
 # CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_LOG_BUF_SHIFT=17
-# CONFIG_GROUP_SCHED is not set
 # CONFIG_CGROUPS is not set
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
@@ -59,6 +71,7 @@ CONFIG_INITRAMFS_SOURCE=""
 CONFIG_RD_GZIP=y
 CONFIG_RD_BZIP2=y
 CONFIG_RD_LZMA=y
+CONFIG_RD_LZO=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -80,19 +93,21 @@ CONFIG_TIMERFD=y
 CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
 CONFIG_AIO=y
+CONFIG_HAVE_PERF_EVENTS=y
+CONFIG_PERF_USE_VMALLOC=y
 
 #
-# Performance Counters
+# Kernel Performance Events And Counters
 #
+# CONFIG_PERF_EVENTS is not set
+# CONFIG_PERF_COUNTERS is not set
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_SLUB_DEBUG=y
-# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_COMPAT_BRK=y
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
 # CONFIG_PROFILING is not set
-# CONFIG_MARKERS is not set
 CONFIG_HAVE_OPROFILE=y
 # CONFIG_KPROBES is not set
 CONFIG_HAVE_KPROBES=y
@@ -122,25 +137,56 @@ CONFIG_LBDAF=y
 # IO Schedulers
 #
 CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
 CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
 # CONFIG_DEFAULT_DEADLINE is not set
 CONFIG_DEFAULT_CFQ=y
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_INLINE_SPIN_TRYLOCK is not set
+# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK is not set
+# CONFIG_INLINE_SPIN_LOCK_BH is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQ is not set
+# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set
+# CONFIG_INLINE_SPIN_UNLOCK is not set
+# CONFIG_INLINE_SPIN_UNLOCK_BH is not set
+# CONFIG_INLINE_SPIN_UNLOCK_IRQ is not set
+# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_READ_TRYLOCK is not set
+# CONFIG_INLINE_READ_LOCK is not set
+# CONFIG_INLINE_READ_LOCK_BH is not set
+# CONFIG_INLINE_READ_LOCK_IRQ is not set
+# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set
+# CONFIG_INLINE_READ_UNLOCK is not set
+# CONFIG_INLINE_READ_UNLOCK_BH is not set
+# CONFIG_INLINE_READ_UNLOCK_IRQ is not set
+# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set
+# CONFIG_INLINE_WRITE_TRYLOCK is not set
+# CONFIG_INLINE_WRITE_LOCK is not set
+# CONFIG_INLINE_WRITE_LOCK_BH is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQ is not set
+# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set
+# CONFIG_INLINE_WRITE_UNLOCK is not set
+# CONFIG_INLINE_WRITE_UNLOCK_BH is not set
+# CONFIG_INLINE_WRITE_UNLOCK_IRQ is not set
+# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set
+# CONFIG_MUTEX_SPIN_ON_OWNER is not set
 # CONFIG_FREEZER is not set
 
 #
 # System Type
 #
+CONFIG_MMU=y
 # CONFIG_ARCH_AAEC2000 is not set
 # CONFIG_ARCH_INTEGRATOR is not set
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_VEXPRESS is not set
 # CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_BCMRING is not set
 # CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_CNS3XXX is not set
 # CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
@@ -156,6 +202,7 @@ CONFIG_DEFAULT_IOSCHED="cfq"
 # CONFIG_ARCH_IXP2000 is not set
 # CONFIG_ARCH_IXP4XX is not set
 # CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_DOVE is not set
 # CONFIG_ARCH_KIRKWOOD is not set
 # CONFIG_ARCH_LOKI is not set
 # CONFIG_ARCH_MV78XX0 is not set
@@ -164,39 +211,64 @@ CONFIG_DEFAULT_IOSCHED="cfq"
 # CONFIG_ARCH_KS8695 is not set
 # CONFIG_ARCH_NS9XXX is not set
 # CONFIG_ARCH_W90X900 is not set
+# CONFIG_ARCH_NUC93X is not set
 # CONFIG_ARCH_PNX4008 is not set
 # CONFIG_ARCH_PXA is not set
 # CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_SHMOBILE is not set
 # CONFIG_ARCH_RPC is not set
 # CONFIG_ARCH_SA1100 is not set
 # CONFIG_ARCH_S3C2410 is not set
 # CONFIG_ARCH_S3C64XX is not set
+# CONFIG_ARCH_S5P6440 is not set
+# CONFIG_ARCH_S5P6442 is not set
 CONFIG_ARCH_S5PC100=y
+# CONFIG_ARCH_S5PV210 is not set
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
 # CONFIG_ARCH_U300 is not set
+# CONFIG_ARCH_U8500 is not set
+# CONFIG_ARCH_NOMADIK is not set
 # CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
-CONFIG_PLAT_S3C=y
+# CONFIG_PLAT_SPEAR is not set
+CONFIG_PLAT_SAMSUNG=y
 
 #
 # Boot options
 #
 # CONFIG_S3C_BOOT_ERROR_RESET is not set
 CONFIG_S3C_BOOT_UART_FORCE_FIFO=y
+CONFIG_S3C_LOWLEVEL_UART_PORT=0
+CONFIG_SAMSUNG_CLKSRC=y
+CONFIG_SAMSUNG_IRQ_VIC_TIMER=y
+CONFIG_SAMSUNG_IRQ_UART=y
+CONFIG_SAMSUNG_GPIOLIB_4BIT=y
+CONFIG_S3C_GPIO_CFG_S3C24XX=y
+CONFIG_S3C_GPIO_CFG_S3C64XX=y
+CONFIG_S3C_GPIO_PULL_UPDOWN=y
+CONFIG_S5P_GPIO_DRVSTR=y
+CONFIG_SAMSUNG_GPIO_EXTRA=0
+CONFIG_S3C_GPIO_SPACE=0
+CONFIG_S3C_GPIO_TRACK=y
+# CONFIG_S3C_ADC is not set
+CONFIG_S3C_DEV_HSMMC=y
+CONFIG_S3C_DEV_HSMMC1=y
+CONFIG_S3C_DEV_HSMMC2=y
+CONFIG_S3C_DEV_I2C1=y
+CONFIG_S3C_DEV_FB=y
+CONFIG_S3C_PL330_DMA=y
 
 #
 # Power management
 #
-CONFIG_S3C_LOWLEVEL_UART_PORT=0
-CONFIG_S3C_GPIO_SPACE=0
-CONFIG_S3C_GPIO_TRACK=y
-CONFIG_S3C_GPIO_PULL_UPDOWN=y
-CONFIG_PLAT_S5PC1XX=y
-CONFIG_CPU_S5PC100_INIT=y
-CONFIG_CPU_S5PC100_CLOCK=y
-CONFIG_S5PC100_SETUP_I2C0=y
+CONFIG_PLAT_S5P=y
+CONFIG_S5P_EXT_INT=y
 CONFIG_CPU_S5PC100=y
+CONFIG_S5PC100_SETUP_FB_24BPP=y
+CONFIG_S5PC100_SETUP_I2C1=y
+CONFIG_S5PC100_SETUP_SDHCI=y
+CONFIG_S5PC100_SETUP_SDHCI_GPIO=y
 CONFIG_MACH_SMDKC100=y
 
 #
@@ -206,7 +278,7 @@ CONFIG_CPU_32v6K=y
 CONFIG_CPU_V7=y
 CONFIG_CPU_32v7=y
 CONFIG_CPU_ABRT_EV7=y
-CONFIG_CPU_PABRT_IFAR=y
+CONFIG_CPU_PABRT_V7=y
 CONFIG_CPU_CACHE_V7=y
 CONFIG_CPU_CACHE_VIPT=y
 CONFIG_CPU_COPY_V6=y
@@ -224,11 +296,15 @@ CONFIG_ARM_THUMB=y
 # CONFIG_CPU_DCACHE_DISABLE is not set
 # CONFIG_CPU_BPREDICT_DISABLE is not set
 CONFIG_HAS_TLS_REG=y
+CONFIG_ARM_L1_CACHE_SHIFT=6
+CONFIG_ARM_DMA_MEM_BUFFERABLE=y
+CONFIG_CPU_HAS_PMU=y
 # CONFIG_ARM_ERRATA_430973 is not set
 # CONFIG_ARM_ERRATA_458693 is not set
 # CONFIG_ARM_ERRATA_460075 is not set
 CONFIG_ARM_VIC=y
 CONFIG_ARM_VIC_NR=2
+CONFIG_PL330=y
 
 #
 # Bus support
@@ -244,8 +320,11 @@ CONFIG_VMSPLIT_3G=y
 # CONFIG_VMSPLIT_2G is not set
 # CONFIG_VMSPLIT_1G is not set
 CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 CONFIG_HZ=100
+# CONFIG_THUMB2_KERNEL is not set
 CONFIG_AEABI=y
 CONFIG_OABI_COMPAT=y
 # CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
@@ -258,12 +337,11 @@ CONFIG_FLATMEM_MANUAL=y
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 CONFIG_PAGEFLAGS_EXTENDED=y
-CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_SPLIT_PTLOCK_CPUS=999999
 # CONFIG_PHYS_ADDR_T_64BIT is not set
 CONFIG_ZONE_DMA_FLAG=0
 CONFIG_VIRT_TO_BUS=y
-CONFIG_HAVE_MLOCK=y
-CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+# CONFIG_KSM is not set
 CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
 CONFIG_ALIGNMENT_TRAP=y
 # CONFIG_UACCESS_WITH_MEMCPY is not set
@@ -274,6 +352,7 @@ CONFIG_ALIGNMENT_TRAP=y
 CONFIG_ZBOOT_ROM_TEXT=0
 CONFIG_ZBOOT_ROM_BSS=0
 CONFIG_CMDLINE="root=/dev/mtdblock2 rootfstype=cramfs init=/linuxrc console=ttySAC2,115200 mem=128M"
+# CONFIG_CMDLINE_FORCE is not set
 # CONFIG_XIP_KERNEL is not set
 # CONFIG_KEXEC is not set
 
@@ -317,6 +396,7 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y
 # Generic Driver Options
 #
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_DEVTMPFS is not set
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 CONFIG_FW_LOADER=y
@@ -331,6 +411,10 @@ CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=y
 # CONFIG_BLK_DEV_CRYPTOLOOP is not set
+
+#
+# DRBD disabled because PROC_FS, INET or CONNECTOR not selected
+#
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -338,9 +422,12 @@ CONFIG_BLK_DEV_RAM_SIZE=8192
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_MG_DISK is not set
 CONFIG_MISC_DEVICES=y
+# CONFIG_AD525X_DPOT is not set
 # CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_ISL29003 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_DS1682 is not set
 # CONFIG_C2PORT is not set
 
 #
@@ -350,18 +437,21 @@ CONFIG_EEPROM_AT24=y
 # CONFIG_EEPROM_LEGACY is not set
 # CONFIG_EEPROM_MAX6875 is not set
 # CONFIG_EEPROM_93CX6 is not set
+# CONFIG_IWMC3200TOP is not set
 CONFIG_HAVE_IDE=y
 # CONFIG_IDE is not set
 
 #
 # SCSI device support
 #
+CONFIG_SCSI_MOD=y
 # CONFIG_RAID_ATTRS is not set
 # CONFIG_SCSI is not set
 # CONFIG_SCSI_DMA is not set
 # CONFIG_SCSI_NETLINK is not set
 # CONFIG_ATA is not set
 # CONFIG_MD is not set
+# CONFIG_PHONE is not set
 
 #
 # Input device support
@@ -369,6 +459,7 @@ CONFIG_HAVE_IDE=y
 CONFIG_INPUT=y
 # CONFIG_INPUT_FF_MEMLESS is not set
 # CONFIG_INPUT_POLLDEV is not set
+# CONFIG_INPUT_SPARSEKMAP is not set
 
 #
 # Userland interfaces
@@ -385,13 +476,19 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
 # Input Device Drivers
 #
 CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ADP5588 is not set
 CONFIG_KEYBOARD_ATKBD=y
-# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_QT2160 is not set
 # CONFIG_KEYBOARD_LKKBD is not set
-# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_KEYBOARD_TCA6416 is not set
+# CONFIG_KEYBOARD_MATRIX is not set
+# CONFIG_KEYBOARD_MAX7359 is not set
 # CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_OPENCORES is not set
 # CONFIG_KEYBOARD_STOWAWAY is not set
-# CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
 CONFIG_INPUT_MOUSE=y
 CONFIG_MOUSE_PS2=y
 CONFIG_MOUSE_PS2_ALPS=y
@@ -399,6 +496,7 @@ CONFIG_MOUSE_PS2_LOGIPS2PP=y
 CONFIG_MOUSE_PS2_SYNAPTICS=y
 CONFIG_MOUSE_PS2_TRACKPOINT=y
 # CONFIG_MOUSE_PS2_ELANTECH is not set
+# CONFIG_MOUSE_PS2_SENTELIC is not set
 # CONFIG_MOUSE_PS2_TOUCHKIT is not set
 # CONFIG_MOUSE_SERIAL is not set
 # CONFIG_MOUSE_APPLETOUCH is not set
@@ -418,6 +516,7 @@ CONFIG_SERIO=y
 CONFIG_SERIO_SERPORT=y
 CONFIG_SERIO_LIBPS2=y
 # CONFIG_SERIO_RAW is not set
+# CONFIG_SERIO_ALTERA_PS2 is not set
 # CONFIG_GAMEPORT is not set
 
 #
@@ -444,11 +543,16 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 # Non-8250 serial port support
 #
 CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS=3
+CONFIG_SERIAL_SAMSUNG_UARTS_4=y
+CONFIG_SERIAL_SAMSUNG_UARTS=4
 # CONFIG_SERIAL_SAMSUNG_DEBUG is not set
 CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_S3C6400=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_TIMBERDALE is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
@@ -461,6 +565,7 @@ CONFIG_HW_RANDOM=y
 # CONFIG_TCG_TPM is not set
 CONFIG_I2C=y
 CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_COMPAT=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_HELPER_AUTO=y
 
@@ -471,9 +576,11 @@ CONFIG_I2C_HELPER_AUTO=y
 #
 # I2C system bus drivers (mostly embedded / system-on-chip)
 #
+# CONFIG_I2C_DESIGNWARE is not set
 # CONFIG_I2C_GPIO is not set
 # CONFIG_I2C_OCORES is not set
 # CONFIG_I2C_SIMTEC is not set
+# CONFIG_I2C_XILINX is not set
 
 #
 # External I2C/SMBus adapter drivers
@@ -486,20 +593,15 @@ CONFIG_I2C_HELPER_AUTO=y
 #
 # CONFIG_I2C_PCA_PLATFORM is not set
 # CONFIG_I2C_STUB is not set
-
-#
-# Miscellaneous I2C Chip support
-#
-# CONFIG_DS1682 is not set
-# CONFIG_SENSORS_PCF8574 is not set
-# CONFIG_PCF8575 is not set
-# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
 # CONFIG_I2C_DEBUG_ALGO is not set
 # CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
 # CONFIG_SPI is not set
+
+#
+# PPS support
+#
+# CONFIG_PPS is not set
 CONFIG_ARCH_REQUIRE_GPIOLIB=y
 CONFIG_GPIOLIB=y
 # CONFIG_DEBUG_GPIO is not set
@@ -508,13 +610,16 @@ CONFIG_GPIOLIB=y
 #
 # Memory mapped GPIO expanders:
 #
+# CONFIG_GPIO_IT8761E is not set
 
 #
 # I2C GPIO expanders:
 #
+# CONFIG_GPIO_MAX7300 is not set
 # CONFIG_GPIO_MAX732X is not set
 # CONFIG_GPIO_PCA953X is not set
 # CONFIG_GPIO_PCF857X is not set
+# CONFIG_GPIO_ADP5588 is not set
 
 #
 # PCI GPIO expanders:
@@ -523,10 +628,19 @@ CONFIG_GPIOLIB=y
 #
 # SPI GPIO expanders:
 #
+
+#
+# AC97 GPIO expanders:
+#
 # CONFIG_W1 is not set
 # CONFIG_POWER_SUPPLY is not set
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Native drivers
+#
 # CONFIG_SENSORS_AD7414 is not set
 # CONFIG_SENSORS_AD7418 is not set
 # CONFIG_SENSORS_ADM1021 is not set
@@ -535,10 +649,11 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_ADM1029 is not set
 # CONFIG_SENSORS_ADM1031 is not set
 # CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ADT7411 is not set
 # CONFIG_SENSORS_ADT7462 is not set
 # CONFIG_SENSORS_ADT7470 is not set
-# CONFIG_SENSORS_ADT7473 is not set
 # CONFIG_SENSORS_ADT7475 is not set
+# CONFIG_SENSORS_ASC7621 is not set
 # CONFIG_SENSORS_ATXP1 is not set
 # CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_F71805F is not set
@@ -549,6 +664,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_IT87 is not set
 # CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM73 is not set
 # CONFIG_SENSORS_LM75 is not set
 # CONFIG_SENSORS_LM77 is not set
 # CONFIG_SENSORS_LM78 is not set
@@ -573,8 +689,10 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_SMSC47M192 is not set
 # CONFIG_SENSORS_SMSC47B397 is not set
 # CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_AMC6821 is not set
 # CONFIG_SENSORS_THMC50 is not set
 # CONFIG_SENSORS_TMP401 is not set
+# CONFIG_SENSORS_TMP421 is not set
 # CONFIG_SENSORS_VT1211 is not set
 # CONFIG_SENSORS_W83781D is not set
 # CONFIG_SENSORS_W83791D is not set
@@ -584,9 +702,8 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_W83L786NG is not set
 # CONFIG_SENSORS_W83627HF is not set
 # CONFIG_SENSORS_W83627EHF is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_SENSORS_LIS3_I2C is not set
 # CONFIG_THERMAL is not set
-# CONFIG_THERMAL_HWMON is not set
 # CONFIG_WATCHDOG is not set
 CONFIG_SSB_POSSIBLE=y
 
@@ -599,10 +716,12 @@ CONFIG_SSB_POSSIBLE=y
 # Multifunction device drivers
 #
 # CONFIG_MFD_CORE is not set
+# CONFIG_MFD_88PM860X is not set
 # CONFIG_MFD_SM501 is not set
 # CONFIG_MFD_ASIC3 is not set
 # CONFIG_HTC_EGPIO is not set
 # CONFIG_HTC_PASIC3 is not set
+# CONFIG_HTC_I2CPLD is not set
 # CONFIG_TPS65010 is not set
 # CONFIG_TWL4030_CORE is not set
 # CONFIG_MFD_TMIO is not set
@@ -610,10 +729,15 @@ CONFIG_SSB_POSSIBLE=y
 # CONFIG_MFD_TC6387XB is not set
 # CONFIG_MFD_TC6393XB is not set
 # CONFIG_PMIC_DA903X is not set
+# CONFIG_PMIC_ADP5520 is not set
+# CONFIG_MFD_MAX8925 is not set
 # CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM831X is not set
 # CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_WM8994 is not set
 # CONFIG_MFD_PCF50633 is not set
 # CONFIG_AB3100_CORE is not set
+# CONFIG_REGULATOR is not set
 # CONFIG_MEDIA_SUPPORT is not set
 
 #
@@ -637,7 +761,6 @@ CONFIG_DUMMY_CONSOLE=y
 # CONFIG_SOUND is not set
 CONFIG_HID_SUPPORT=y
 CONFIG_HID=y
-CONFIG_HID_DEBUG=y
 # CONFIG_HIDRAW is not set
 # CONFIG_HID_PID is not set
 
@@ -680,13 +803,12 @@ CONFIG_SDIO_UART=y
 CONFIG_MMC_SDHCI=y
 # CONFIG_MMC_SDHCI_PLTFM is not set
 # CONFIG_MEMSTICK is not set
-# CONFIG_ACCESSIBILITY is not set
 # CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
 CONFIG_RTC_LIB=y
 # CONFIG_RTC_CLASS is not set
 # CONFIG_DMADEVICES is not set
 # CONFIG_AUXDISPLAY is not set
-# CONFIG_REGULATOR is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 
@@ -710,6 +832,7 @@ CONFIG_FS_POSIX_ACL=y
 # CONFIG_XFS_FS is not set
 # CONFIG_GFS2_FS is not set
 # CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_FILE_LOCKING=y
 CONFIG_FSNOTIFY=y
 CONFIG_DNOTIFY=y
@@ -758,6 +881,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
+# CONFIG_LOGFS is not set
 CONFIG_CRAMFS=y
 # CONFIG_SQUASHFS is not set
 # CONFIG_VXFS_FS is not set
@@ -772,7 +896,6 @@ CONFIG_ROMFS_BACKED_BY_BLOCK=y
 CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-# CONFIG_NILFS2_FS is not set
 
 #
 # Partition Types
@@ -789,6 +912,7 @@ CONFIG_ENABLE_WARN_DEPRECATED=y
 CONFIG_ENABLE_MUST_CHECK=y
 CONFIG_FRAME_WARN=1024
 CONFIG_MAGIC_SYSRQ=y
+# CONFIG_STRIP_ASM_SYMS is not set
 # CONFIG_UNUSED_SYMBOLS is not set
 # CONFIG_DEBUG_FS is not set
 # CONFIG_HEADERS_CHECK is not set
@@ -826,11 +950,13 @@ CONFIG_DEBUG_MEMORY_INIT=y
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_SG is not set
 # CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
 # CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
@@ -839,6 +965,7 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_TRACING_SUPPORT=y
 CONFIG_FTRACE=y
 # CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_ENABLE_DEFAULT_TRACERS is not set
 # CONFIG_BOOT_TRACER is not set
@@ -849,6 +976,7 @@ CONFIG_BRANCH_PROFILE_NONE=y
 # CONFIG_KMEMTRACE is not set
 # CONFIG_WORKQUEUE_TRACER is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -857,8 +985,9 @@ CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_ERRORS=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 CONFIG_DEBUG_LL=y
+# CONFIG_EARLY_PRINTK is not set
 # CONFIG_DEBUG_ICEDCC is not set
-CONFIG_DEBUG_S3C_PORT=y
+# CONFIG_OC_ETM is not set
 CONFIG_DEBUG_S3C_UART=0
 
 #
@@ -867,7 +996,11 @@ CONFIG_DEBUG_S3C_UART=0
 # CONFIG_KEYS is not set
 # CONFIG_SECURITY is not set
 # CONFIG_SECURITYFS is not set
-# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_DEFAULT_SECURITY_SELINUX is not set
+# CONFIG_DEFAULT_SECURITY_SMACK is not set
+# CONFIG_DEFAULT_SECURITY_TOMOYO is not set
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEFAULT_SECURITY=""
 # CONFIG_CRYPTO is not set
 # CONFIG_BINARY_PRINTF is not set
 
@@ -884,8 +1017,10 @@ CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
+CONFIG_LZO_DECOMPRESS=y
 CONFIG_DECOMPRESS_GZIP=y
 CONFIG_DECOMPRESS_BZIP2=y
 CONFIG_DECOMPRESS_LZMA=y
+CONFIG_DECOMPRESS_LZO=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_DMA=y
index 796cb78..c4de360 100644 (file)
@@ -1,11 +1,13 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.34
-# Sat May 22 03:18:21 2010
+# Wed May 26 19:04:37 2010
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
 CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_ARCH_USES_GETTIMEOFFSET=y
 CONFIG_HAVE_PROC_CPU=y
 CONFIG_NO_IOPORT=y
 CONFIG_GENERIC_HARDIRQS=y
@@ -35,6 +37,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
 CONFIG_HAVE_KERNEL_LZO=y
 CONFIG_KERNEL_GZIP=y
 # CONFIG_KERNEL_BZIP2 is not set
@@ -180,9 +183,11 @@ CONFIG_MMU=y
 # CONFIG_ARCH_INTEGRATOR is not set
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_VEXPRESS is not set
 # CONFIG_ARCH_AT91 is not set
 # CONFIG_ARCH_BCMRING is not set
 # CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_CNS3XXX is not set
 # CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
@@ -218,7 +223,7 @@ CONFIG_MMU=y
 # CONFIG_ARCH_S3C64XX is not set
 # CONFIG_ARCH_S5P6440 is not set
 # CONFIG_ARCH_S5P6442 is not set
-# CONFIG_ARCH_S5PC1XX is not set
+# CONFIG_ARCH_S5PC100 is not set
 CONFIG_ARCH_S5PV210=y
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
@@ -227,6 +232,7 @@ CONFIG_ARCH_S5PV210=y
 # CONFIG_ARCH_NOMADIK is not set
 # CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
+# CONFIG_PLAT_SPEAR is not set
 CONFIG_PLAT_SAMSUNG=y
 
 #
@@ -242,16 +248,22 @@ CONFIG_SAMSUNG_GPIOLIB_4BIT=y
 CONFIG_S3C_GPIO_CFG_S3C24XX=y
 CONFIG_S3C_GPIO_CFG_S3C64XX=y
 CONFIG_S3C_GPIO_PULL_UPDOWN=y
+CONFIG_S5P_GPIO_DRVSTR=y
 CONFIG_SAMSUNG_GPIO_EXTRA=0
 CONFIG_S3C_GPIO_SPACE=0
 CONFIG_S3C_GPIO_TRACK=y
 # CONFIG_S3C_ADC is not set
+CONFIG_S3C_DEV_WDT=y
+CONFIG_S3C_PL330_DMA=y
 
 #
 # Power management
 #
 CONFIG_PLAT_S5P=y
+CONFIG_S5P_EXT_INT=y
 CONFIG_CPU_S5PV210=y
+# CONFIG_MACH_AQUILA is not set
+# CONFIG_MACH_GONI is not set
 # CONFIG_MACH_SMDKV210 is not set
 CONFIG_MACH_SMDKC110=y
 
@@ -281,12 +293,14 @@ CONFIG_ARM_THUMB=y
 # CONFIG_CPU_BPREDICT_DISABLE is not set
 CONFIG_HAS_TLS_REG=y
 CONFIG_ARM_L1_CACHE_SHIFT=6
+CONFIG_ARM_DMA_MEM_BUFFERABLE=y
 CONFIG_CPU_HAS_PMU=y
 # CONFIG_ARM_ERRATA_430973 is not set
 # CONFIG_ARM_ERRATA_458693 is not set
 # CONFIG_ARM_ERRATA_460075 is not set
 CONFIG_ARM_VIC=y
 CONFIG_ARM_VIC_NR=2
+CONFIG_PL330=y
 
 #
 # Bus support
@@ -335,6 +349,7 @@ CONFIG_ALIGNMENT_TRAP=y
 CONFIG_ZBOOT_ROM_TEXT=0
 CONFIG_ZBOOT_ROM_BSS=0
 CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x20800000,8M console=ttySAC1,115200 init=/linuxrc"
+# CONFIG_CMDLINE_FORCE is not set
 # CONFIG_XIP_KERNEL is not set
 # CONFIG_KEXEC is not set
 
@@ -481,6 +496,7 @@ CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_TOUCHSCREEN=y
 # CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_DYNAPRO is not set
+# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -536,6 +552,8 @@ CONFIG_SERIAL_S5PV210=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_TIMBERDALE is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
@@ -583,6 +601,7 @@ CONFIG_GPIOLIB=y
 # CONFIG_HWMON is not set
 # CONFIG_THERMAL is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_HAVE_S3C2410_WATCHDOG=y
 CONFIG_SSB_POSSIBLE=y
 
 #
@@ -635,10 +654,6 @@ CONFIG_RTC_LIB=y
 # CONFIG_DMADEVICES is not set
 # CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
-
-#
-# TI VLYNQ
-#
 # CONFIG_STAGING is not set
 
 #
@@ -847,6 +862,8 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_TRACING_SUPPORT=y
 CONFIG_FTRACE=y
 # CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_ENABLE_DEFAULT_TRACERS is not set
 # CONFIG_BOOT_TRACER is not set
@@ -857,6 +874,7 @@ CONFIG_BRANCH_PROFILE_NONE=y
 # CONFIG_KMEMTRACE is not set
 # CONFIG_WORKQUEUE_TRACER is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
index 6831dab..e2f5bce 100644 (file)
@@ -1,11 +1,13 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.34
-# Sat May 22 03:18:22 2010
+# Wed May 26 19:04:39 2010
 #
 CONFIG_ARM=y
 CONFIG_SYS_SUPPORTS_APM_EMULATION=y
 CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_ARCH_USES_GETTIMEOFFSET=y
 CONFIG_HAVE_PROC_CPU=y
 CONFIG_NO_IOPORT=y
 CONFIG_GENERIC_HARDIRQS=y
@@ -35,6 +37,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
 CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
 CONFIG_HAVE_KERNEL_LZO=y
 CONFIG_KERNEL_GZIP=y
 # CONFIG_KERNEL_BZIP2 is not set
@@ -180,9 +183,11 @@ CONFIG_MMU=y
 # CONFIG_ARCH_INTEGRATOR is not set
 # CONFIG_ARCH_REALVIEW is not set
 # CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_VEXPRESS is not set
 # CONFIG_ARCH_AT91 is not set
 # CONFIG_ARCH_BCMRING is not set
 # CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_CNS3XXX is not set
 # CONFIG_ARCH_GEMINI is not set
 # CONFIG_ARCH_EBSA110 is not set
 # CONFIG_ARCH_EP93XX is not set
@@ -218,7 +223,7 @@ CONFIG_MMU=y
 # CONFIG_ARCH_S3C64XX is not set
 # CONFIG_ARCH_S5P6440 is not set
 # CONFIG_ARCH_S5P6442 is not set
-# CONFIG_ARCH_S5PC1XX is not set
+# CONFIG_ARCH_S5PC100 is not set
 CONFIG_ARCH_S5PV210=y
 # CONFIG_ARCH_SHARK is not set
 # CONFIG_ARCH_LH7A40X is not set
@@ -227,6 +232,7 @@ CONFIG_ARCH_S5PV210=y
 # CONFIG_ARCH_NOMADIK is not set
 # CONFIG_ARCH_DAVINCI is not set
 # CONFIG_ARCH_OMAP is not set
+# CONFIG_PLAT_SPEAR is not set
 CONFIG_PLAT_SAMSUNG=y
 
 #
@@ -242,16 +248,24 @@ CONFIG_SAMSUNG_GPIOLIB_4BIT=y
 CONFIG_S3C_GPIO_CFG_S3C24XX=y
 CONFIG_S3C_GPIO_CFG_S3C64XX=y
 CONFIG_S3C_GPIO_PULL_UPDOWN=y
+CONFIG_S5P_GPIO_DRVSTR=y
 CONFIG_SAMSUNG_GPIO_EXTRA=0
 CONFIG_S3C_GPIO_SPACE=0
 CONFIG_S3C_GPIO_TRACK=y
 # CONFIG_S3C_ADC is not set
+CONFIG_S3C_DEV_WDT=y
+CONFIG_SAMSUNG_DEV_ADC=y
+CONFIG_SAMSUNG_DEV_TS=y
+CONFIG_S3C_PL330_DMA=y
 
 #
 # Power management
 #
 CONFIG_PLAT_S5P=y
+CONFIG_S5P_EXT_INT=y
 CONFIG_CPU_S5PV210=y
+# CONFIG_MACH_AQUILA is not set
+# CONFIG_MACH_GONI is not set
 CONFIG_MACH_SMDKV210=y
 # CONFIG_MACH_SMDKC110 is not set
 
@@ -281,12 +295,14 @@ CONFIG_ARM_THUMB=y
 # CONFIG_CPU_BPREDICT_DISABLE is not set
 CONFIG_HAS_TLS_REG=y
 CONFIG_ARM_L1_CACHE_SHIFT=6
+CONFIG_ARM_DMA_MEM_BUFFERABLE=y
 CONFIG_CPU_HAS_PMU=y
 # CONFIG_ARM_ERRATA_430973 is not set
 # CONFIG_ARM_ERRATA_458693 is not set
 # CONFIG_ARM_ERRATA_460075 is not set
 CONFIG_ARM_VIC=y
 CONFIG_ARM_VIC_NR=2
+CONFIG_PL330=y
 
 #
 # Bus support
@@ -335,6 +351,7 @@ CONFIG_ALIGNMENT_TRAP=y
 CONFIG_ZBOOT_ROM_TEXT=0
 CONFIG_ZBOOT_ROM_BSS=0
 CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x20800000,8M console=ttySAC1,115200 init=/linuxrc"
+# CONFIG_CMDLINE_FORCE is not set
 # CONFIG_XIP_KERNEL is not set
 # CONFIG_KEXEC is not set
 
@@ -481,7 +498,9 @@ CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_TOUCHSCREEN=y
 # CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_DYNAPRO is not set
+# CONFIG_TOUCHSCREEN_HAMPSHIRE is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
+# CONFIG_TOUCHSCREEN_S3C2410 is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
 # CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
@@ -536,6 +555,8 @@ CONFIG_SERIAL_S5PV210=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_TIMBERDALE is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 CONFIG_LEGACY_PTYS=y
@@ -583,6 +604,7 @@ CONFIG_GPIOLIB=y
 # CONFIG_HWMON is not set
 # CONFIG_THERMAL is not set
 # CONFIG_WATCHDOG is not set
+CONFIG_HAVE_S3C2410_WATCHDOG=y
 CONFIG_SSB_POSSIBLE=y
 
 #
@@ -635,10 +657,6 @@ CONFIG_RTC_LIB=y
 # CONFIG_DMADEVICES is not set
 # CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
-
-#
-# TI VLYNQ
-#
 # CONFIG_STAGING is not set
 
 #
@@ -847,6 +865,8 @@ CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_TRACING_SUPPORT=y
 CONFIG_FTRACE=y
 # CONFIG_FUNCTION_TRACER is not set
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_PREEMPT_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_ENABLE_DEFAULT_TRACERS is not set
 # CONFIG_BOOT_TRACER is not set
@@ -857,6 +877,7 @@ CONFIG_BRANCH_PROFILE_NONE=y
 # CONFIG_KMEMTRACE is not set
 # CONFIG_WORKQUEUE_TRACER is not set
 # CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
index bcda59f..2f87870 100644 (file)
@@ -3,9 +3,6 @@
 
 #include <asm/memory.h>
 #include <asm/types.h>
-
 #include <asm-generic/scatterlist.h>
 
-#undef ARCH_HAS_SG_CHAIN
-
 #endif /* _ASMARM_SCATTERLIST_H */
index 5a85e24..d4f1e96 100644 (file)
@@ -22,6 +22,9 @@ struct davinci_mmc_config {
 
        /* Version of the MMC/SD controller */
        u8      version;
+
+       /* Number of sg segments */
+       u8      nr_sg;
 };
 void davinci_setup_mmc(int module, struct davinci_mmc_config *config);
 
index e7d629b..f474a80 100644 (file)
@@ -137,9 +137,7 @@ static void ads7846_dev_init(void)
        }
 
        gpio_direction_input(ts_gpio);
-
-       omap_set_gpio_debounce(ts_gpio, 1);
-       omap_set_gpio_debounce_time(ts_gpio, 0xa);
+       gpio_set_debounce(ts_gpio, 310);
 }
 
 static int ads7846_get_pendown_state(void)
index 5fcb52e..fefd7e6 100644 (file)
@@ -209,8 +209,7 @@ static void ads7846_dev_init(void)
        }
 
        gpio_direction_input(ts_gpio);
-       omap_set_gpio_debounce(ts_gpio, 1);
-       omap_set_gpio_debounce_time(ts_gpio, 0xa);
+       gpio_set_debounce(ts_gpio, 310);
 }
 
 static int ads7846_get_pendown_state(void)
index 81bba19..b952610 100644 (file)
@@ -579,9 +579,7 @@ static void ads7846_dev_init(void)
                printk(KERN_ERR "can't get ads7846 pen down GPIO\n");
 
        gpio_direction_input(OMAP3_EVM_TS_GPIO);
-
-       omap_set_gpio_debounce(OMAP3_EVM_TS_GPIO, 1);
-       omap_set_gpio_debounce_time(OMAP3_EVM_TS_GPIO, 0xa);
+       gpio_set_debounce(OMAP3_EVM_TS_GPIO, 310);
 }
 
 static int ads7846_get_pendown_state(void)
index 395d049..db06dc9 100644 (file)
@@ -130,8 +130,8 @@ static struct platform_device pandora_keys_gpio = {
 static void __init pandora_keys_gpio_init(void)
 {
        /* set debounce time for GPIO banks 4 and 6 */
-       omap_set_gpio_debounce_time(32 * 3, GPIO_DEBOUNCE_TIME);
-       omap_set_gpio_debounce_time(32 * 5, GPIO_DEBOUNCE_TIME);
+       gpio_set_debounce(32 * 3, GPIO_DEBOUNCE_TIME);
+       gpio_set_debounce(32 * 5, GPIO_DEBOUNCE_TIME);
 }
 
 static int board_keymap[] = {
index 2504d41..2f5f823 100644 (file)
@@ -328,8 +328,7 @@ static void __init omap3_ads7846_init(void)
        }
 
        gpio_direction_input(OMAP3_TS_GPIO);
-       omap_set_gpio_debounce(OMAP3_TS_GPIO, 1);
-       omap_set_gpio_debounce_time(OMAP3_TS_GPIO, 0xa);
+       gpio_set_debounce(OMAP3_TS_GPIO, 310);
 }
 
 static struct ads7846_platform_data ads7846_config = {
index a4b9b40..911854d 100644 (file)
 #define S5P_IRQ_EINT_BASE      (S5P_IRQ_VIC1(31) + 6)
 
 #define S5P_EINT(x)            ((x) + S5P_IRQ_EINT_BASE)
-#define IRQ_EINT(x)            S5P_EINT(x)
+
+#define S5P_EINT_BASE1         (S5P_IRQ_EINT_BASE)
+/*
+ * S5P6440 has 0-15 external interrupts in group 0. Only these can be used
+ * to wake up from sleep. If request is beyond this range, by mistake, a large
+ * return value for an irq number should be indication of something amiss.
+ */
+#define S5P_EINT_BASE2         (0xf0000000)
 
 /*
  * Next the external interrupt groups. These are similar to the IRQ_EINT(x)
index da66580..02c2374 100644 (file)
@@ -77,8 +77,9 @@
 
 #define S5P_IRQ_EINT_BASE      (IRQ_VIC_END + 1)
 
-#define IRQ_EINT(x)             ((x) < 16 ? S5P_IRQ_VIC0(x) : \
-                                       (S5P_IRQ_EINT_BASE + (x)-16))
+#define S5P_EINT_BASE1         (S5P_IRQ_VIC0(0))
+#define S5P_EINT_BASE2         (S5P_IRQ_EINT_BASE)
+
 /* Set the default NR_IRQS */
 
 #define NR_IRQS                (IRQ_EINT(31) + 1)
index 15066df..28aa551 100644 (file)
 #define S5P_EINT_BASE1         (S5P_IRQ_VIC0(0))
 #define S5P_EINT_BASE2         (IRQ_VIC_END + 1)
 
-#define IRQ_EINT(x)            ((x) < 16 ? S5P_IRQ_VIC0(x) : \
-                                (S5P_EINT_BASE2 + (x) - 16))
-
 #define S3C_IRQ_GPIO_BASE      (IRQ_EINT(31) + 1)
 #define S3C_IRQ_GPIO(x)                (S3C_IRQ_GPIO_BASE + (x))
 
index 763edeb..dd6295e 100644 (file)
 #define S5PC100EINT30PEND              (S5P_VA_GPIO + 0xF40)
 #define S5P_EINT_PEND(x)               (S5PC100EINT30PEND + ((x) * 0x4))
 
-#define eint_offset(irq) ((irq) < IRQ_EINT16_31 ? ((irq) - IRQ_EINT(0)) : \
-                         (((irq) - S5P_EINT_BASE2)))
+#define EINT_REG_NR(x)                 (EINT_OFFSET(x) >> 3)
 
-#define EINT_REG_NR(x)                 (eint_offset(x) >> 3)
-
-#define eint_irq_to_bit(irq)           (1 << (eint_offset(irq) & 0x7))
+#define eint_irq_to_bit(irq)           (1 << (EINT_OFFSET(irq) & 0x7))
 
 /* values for S5P_EXTINT0 */
 #define S5P_EXTINT_LOWLEV              (0x00)
index 92fc6c7..9689537 100644 (file)
 #define IRQ_MDNIE3             S5P_IRQ_VIC3(8)
 #define IRQ_VIC_END            S5P_IRQ_VIC3(31)
 
-#define S5P_EINT_16_31_BASE    (IRQ_VIC_END + 1)
-
-#define EINT_MODE              S3C_GPIO_SFN(0xf)
-
-#define IRQ_EINT(x)            ((x) < 16 ? ((x) + S5P_IRQ_VIC0(0)) \
-                                       : ((x) + S5P_EINT_16_31_BASE))
+#define S5P_EINT_BASE1         (S5P_IRQ_VIC0(0))
+#define S5P_EINT_BASE2         (IRQ_VIC_END + 1)
 
 /* Set the default NR_IRQS */
-
 #define NR_IRQS                        (IRQ_EINT(31) + 1)
 
-#define EINT_GPIO_0(x)         S5PV210_GPH0(x)
-#define EINT_GPIO_1(x)         S5PV210_GPH1(x)
-#define EINT_GPIO_2(x)         S5PV210_GPH2(x)
-#define EINT_GPIO_3(x)         S5PV210_GPH3(x)
-
 /* Compatibility */
 #define IRQ_LCD_FIFO           IRQ_LCD0
 #define IRQ_LCD_VSYNC          IRQ_LCD1
index 6d06809..49e029b 100644 (file)
 #define S5PV210_EINT30PEND             (S5P_VA_GPIO + 0xF40)
 #define S5P_EINT_PEND(x)               (S5PV210_EINT30PEND + ((x) * 0x4))
 
-#define eint_offset(irq)       ((irq) < IRQ_EINT16_31 ? ((irq) - IRQ_EINT(0)) \
-                                               : ((irq) - S5P_EINT_16_31_BASE))
+#define EINT_REG_NR(x)                 (EINT_OFFSET(x) >> 3)
 
-#define EINT_REG_NR(x)                 (eint_offset(x) >> 3)
-
-#define eint_irq_to_bit(irq)           (1 << (eint_offset(irq) & 0x7))
+#define eint_irq_to_bit(irq)           (1 << (EINT_OFFSET(irq) & 0x7))
 
 /* values for S5P_EXTINT0 */
 #define S5P_EXTINT_LOWLEV              (0x00)
 #define S5P_EXTINT_RISEEDGE            (0x03)
 #define S5P_EXTINT_BOTHEDGE            (0x04)
 
+#define EINT_MODE              S3C_GPIO_SFN(0xf)
+
+#define EINT_GPIO_0(x)         S5PV210_GPH0(x)
+#define EINT_GPIO_1(x)         S5PV210_GPH1(x)
+#define EINT_GPIO_2(x)         S5PV210_GPH2(x)
+#define EINT_GPIO_3(x)         S5PV210_GPH3(x)
+
 #endif /* __ASM_ARCH_REGS_GPIO_H */
index dc2ac42..393e921 100644 (file)
@@ -624,79 +624,58 @@ do {      \
        __raw_writel(l, base + reg); \
 } while(0)
 
-void omap_set_gpio_debounce(int gpio, int enable)
+/**
+ * _set_gpio_debounce - low level gpio debounce time
+ * @bank: the gpio bank we're acting upon
+ * @gpio: the gpio number on this @gpio
+ * @debounce: debounce time to use
+ *
+ * OMAP's debounce time is in 31us steps so we need
+ * to convert and round up to the closest unit.
+ */
+static void _set_gpio_debounce(struct gpio_bank *bank, unsigned gpio,
+               unsigned debounce)
 {
-       struct gpio_bank *bank;
-       void __iomem *reg;
-       unsigned long flags;
-       u32 val, l = 1 << get_gpio_index(gpio);
+       void __iomem            *reg = bank->base;
+       u32                     val;
+       u32                     l;
+
+       if (debounce < 32)
+               debounce = 0x01;
+       else if (debounce > 7936)
+               debounce = 0xff;
+       else
+               debounce = (debounce / 0x1f) - 1;
 
-       if (cpu_class_is_omap1())
-               return;
+       l = 1 << get_gpio_index(gpio);
 
-       bank = get_gpio_bank(gpio);
-       reg = bank->base;
+       if (cpu_is_omap44xx())
+               reg += OMAP4_GPIO_DEBOUNCINGTIME;
+       else
+               reg += OMAP24XX_GPIO_DEBOUNCE_VAL;
+
+       __raw_writel(debounce, reg);
 
+       reg = bank->base;
        if (cpu_is_omap44xx())
                reg += OMAP4_GPIO_DEBOUNCENABLE;
        else
                reg += OMAP24XX_GPIO_DEBOUNCE_EN;
 
-       if (!(bank->mod_usage & l)) {
-               printk(KERN_ERR "GPIO %d not requested\n", gpio);
-               return;
-       }
-
-       spin_lock_irqsave(&bank->lock, flags);
        val = __raw_readl(reg);
 
-       if (enable && !(val & l))
+       if (debounce) {
                val |= l;
-       else if (!enable && (val & l))
-               val &= ~l;
-       else
-               goto done;
-
-       if (cpu_is_omap34xx() || cpu_is_omap44xx()) {
-               bank->dbck_enable_mask = val;
-               if (enable)
+               if (cpu_is_omap34xx() || cpu_is_omap44xx())
                        clk_enable(bank->dbck);
-               else
+       } else {
+               val &= ~l;
+               if (cpu_is_omap34xx() || cpu_is_omap44xx())
                        clk_disable(bank->dbck);
        }
 
        __raw_writel(val, reg);
-done:
-       spin_unlock_irqrestore(&bank->lock, flags);
 }
-EXPORT_SYMBOL(omap_set_gpio_debounce);
-
-void omap_set_gpio_debounce_time(int gpio, int enc_time)
-{
-       struct gpio_bank *bank;
-       void __iomem *reg;
-
-       if (cpu_class_is_omap1())
-               return;
-
-       bank = get_gpio_bank(gpio);
-       reg = bank->base;
-
-       if (!bank->mod_usage) {
-               printk(KERN_ERR "GPIO not requested\n");
-               return;
-       }
-
-       enc_time &= 0xff;
-
-       if (cpu_is_omap44xx())
-               reg += OMAP4_GPIO_DEBOUNCINGTIME;
-       else
-               reg += OMAP24XX_GPIO_DEBOUNCE_VAL;
-
-       __raw_writel(enc_time, reg);
-}
-EXPORT_SYMBOL(omap_set_gpio_debounce_time);
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
 static inline void set_24xx_gpio_triggering(struct gpio_bank *bank, int gpio,
@@ -1656,6 +1635,20 @@ static int gpio_output(struct gpio_chip *chip, unsigned offset, int value)
        return 0;
 }
 
+static int gpio_debounce(struct gpio_chip *chip, unsigned offset,
+               unsigned debounce)
+{
+       struct gpio_bank *bank;
+       unsigned long flags;
+
+       bank = container_of(chip, struct gpio_bank, chip);
+       spin_lock_irqsave(&bank->lock, flags);
+       _set_gpio_debounce(bank, offset, debounce);
+       spin_unlock_irqrestore(&bank->lock, flags);
+
+       return 0;
+}
+
 static void gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 {
        struct gpio_bank *bank;
@@ -1909,6 +1902,7 @@ static int __init _omap_gpio_init(void)
                bank->chip.direction_input = gpio_input;
                bank->chip.get = gpio_get;
                bank->chip.direction_output = gpio_output;
+               bank->chip.set_debounce = gpio_debounce;
                bank->chip.set = gpio_set;
                bank->chip.to_irq = gpio_2irq;
                if (bank_is_mpuio(bank)) {
index 5cb2dd1..11d6a1b 100644 (file)
@@ -29,3 +29,4 @@ config S5P_EXT_INT
        bool
        help
          Use the external interrupts (other than GPIO interrupts.)
+         Note: Do not choose this for S5P6440.
index 9ff3d71..3fb3a3a 100644 (file)
 #define IRQ_TIMER3             S5P_TIMER_IRQ(3)
 #define IRQ_TIMER4             S5P_TIMER_IRQ(4)
 
+#define IRQ_EINT(x)            ((x) < 16 ? ((x) + S5P_EINT_BASE1) \
+                                       : ((x) - 16 + S5P_EINT_BASE2))
+
+#define EINT_OFFSET(irq)       ((irq) < S5P_EINT_BASE2 ? \
+                                               ((irq) - S5P_EINT_BASE1) : \
+                                               ((irq) + 16 - S5P_EINT_BASE2))
+
 #endif /* __ASM_PLAT_S5P_IRQS_H */
index eaa70aa..e56c807 100644 (file)
@@ -60,7 +60,7 @@ static void s5p_irq_eint_maskack(unsigned int irq)
 
 static int s5p_irq_eint_set_type(unsigned int irq, unsigned int type)
 {
-       int offs = eint_offset(irq);
+       int offs = EINT_OFFSET(irq);
        int shift;
        u32 ctrl, mask;
        u32 newvalue = 0;
@@ -139,17 +139,16 @@ static struct irq_chip s5p_irq_eint = {
  */
 static inline void s5p_irq_demux_eint(unsigned int start)
 {
-       u32 status;
+       u32 status = __raw_readl(S5P_EINT_PEND(EINT_REG_NR(start)));
        u32 mask = __raw_readl(S5P_EINT_MASK(EINT_REG_NR(start)));
        unsigned int irq;
 
-       status = __raw_readl(S5P_EINT_PEND(EINT_REG_NR(start)));
        status &= ~mask;
        status &= 0xff;
 
        while (status) {
-               irq = fls(status);
-               generic_handle_irq(irq - 1 + start);
+               irq = fls(status) - 1;
+               generic_handle_irq(irq + start);
                status &= ~(1 << irq);
        }
 }
@@ -162,12 +161,18 @@ static void s5p_irq_demux_eint16_31(unsigned int irq, struct irq_desc *desc)
 
 static inline void s5p_irq_vic_eint_mask(unsigned int irq)
 {
+       void __iomem *base = get_irq_chip_data(irq);
+
        s5p_irq_eint_mask(irq);
+       writel(1 << EINT_OFFSET(irq), base + VIC_INT_ENABLE_CLEAR);
 }
 
 static void s5p_irq_vic_eint_unmask(unsigned int irq)
 {
+       void __iomem *base = get_irq_chip_data(irq);
+
        s5p_irq_eint_unmask(irq);
+       writel(1 << EINT_OFFSET(irq), base + VIC_INT_ENABLE);
 }
 
 static inline void s5p_irq_vic_eint_ack(unsigned int irq)
index 377320e..06394e5 100644 (file)
@@ -1,25 +1,7 @@
 #ifndef __ASM_AVR32_SCATTERLIST_H
 #define __ASM_AVR32_SCATTERLIST_H
 
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-   unsigned long       sg_magic;
-#endif
-    unsigned long      page_link;
-    unsigned int       offset;
-    dma_addr_t         dma_address;
-    unsigned int       length;
-};
-
-/* These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg)     ((sg)->dma_address)
-#define sg_dma_len(sg)         ((sg)->length)
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD (0xffffffff)
 
index 04f4487..64d41d3 100644 (file)
@@ -1,27 +1,7 @@
 #ifndef _BLACKFIN_SCATTERLIST_H
 #define _BLACKFIN_SCATTERLIST_H
 
-#include <linux/mm.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long sg_magic;
-#endif
-       unsigned long page_link;
-       unsigned int offset;
-       dma_addr_t dma_address;
-       unsigned int length;
-};
-
-/*
- * These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns, or alternatively stop on the first sg_dma_len(sg) which
- * is 0.
- */
-#define sg_dma_address(sg)      ((sg)->dma_address)
-#define sg_dma_len(sg)          ((sg)->length)
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD      (0xffffffff)
 
index 43eb969..6ec7768 100644 (file)
@@ -292,28 +292,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                        break;
                }
 
-#ifdef CONFIG_BINFMT_ELF_FDPIC
-       case PTRACE_GETFDPIC: {
-               unsigned long tmp = 0;
-
-               switch (addr) {
-               case_PTRACE_GETFDPIC_EXEC:
-               case PTRACE_GETFDPIC_EXEC:
-                       tmp = child->mm->context.exec_fdpic_loadmap;
-                       break;
-               case_PTRACE_GETFDPIC_INTERP:
-               case PTRACE_GETFDPIC_INTERP:
-                       tmp = child->mm->context.interp_fdpic_loadmap;
-                       break;
-               default:
-                       break;
-               }
-
-               ret = put_user(tmp, datap);
-               break;
-       }
-#endif
-
                /* when I and D space are separate, this will have to be fixed. */
        case PTRACE_POKEDATA:
                pr_debug("ptrace: PTRACE_PEEKDATA\n");
@@ -357,8 +335,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
        case PTRACE_PEEKUSR:
                switch (addr) {
 #ifdef CONFIG_BINFMT_ELF_FDPIC /* backwards compat */
-               case PT_FDPIC_EXEC:   goto case_PTRACE_GETFDPIC_EXEC;
-               case PT_FDPIC_INTERP: goto case_PTRACE_GETFDPIC_INTERP;
+               case PT_FDPIC_EXEC:
+                       request = PTRACE_GETFDPIC;
+                       addr = PTRACE_GETFDPIC_EXEC;
+                       goto case_default;
+               case PT_FDPIC_INTERP:
+                       request = PTRACE_GETFDPIC;
+                       addr = PTRACE_GETFDPIC_INTERP;
+                       goto case_default;
 #endif
                default:
                        ret = get_reg(child, addr, datap);
@@ -385,6 +369,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                                             0, sizeof(struct pt_regs),
                                             (const void __user *)data);
 
+       case_default:
        default:
                ret = ptrace_request(child, request, addr, data);
                break;
index faff53a..249a784 100644 (file)
@@ -1,22 +1,7 @@
 #ifndef __ASM_CRIS_SCATTERLIST_H
 #define __ASM_CRIS_SCATTERLIST_H
 
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long sg_magic;
-#endif
-       char *  address;    /* Location data is to be transferred to */
-       unsigned int length;
-
-       /* The following is i386 highmem junk - not used by us */
-       unsigned long page_link;
-       unsigned int offset;/* for highmem, page offset */
-
-};
-
-#define sg_dma_address(sg)     ((sg)->address)
-#define sg_dma_len(sg)         ((sg)->length)
-/* i386 junk */
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD (0x1fffffff)
 
index 4bca8a2..1614bfd 100644 (file)
@@ -1,45 +1,7 @@
 #ifndef _ASM_SCATTERLIST_H
 #define _ASM_SCATTERLIST_H
 
-#include <asm/types.h>
-
-/*
- * Drivers must set either ->address or (preferred) page and ->offset
- * to indicate where data must be transferred to/from.
- *
- * Using page is recommended since it handles highmem data as well as
- * low mem. ->address is restricted to data which has a virtual mapping, and
- * it will go away in the future. Updating to page can be automated very
- * easily -- something like
- *
- * sg->address = some_ptr;
- *
- * can be rewritten as
- *
- * sg_set_buf(sg, some_ptr, length);
- *
- * and that's it. There's no excuse for not highmem enabling YOUR driver. /jens
- */
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long   sg_magic;
-#endif
-       unsigned long   page_link;
-       unsigned int    offset;         /* for highmem, page offset */
-
-       dma_addr_t      dma_address;
-       unsigned int    length;
-};
-
-/*
- * These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns, or alternatively stop on the first sg_dma_len(sg) which
- * is 0.
- */
-#define sg_dma_address(sg)     ((sg)->dma_address)
-#define sg_dma_len(sg)         ((sg)->length)
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD (0xffffffffUL)
 
index 60eeed3..fac0289 100644 (file)
@@ -344,26 +344,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                                             0, sizeof(child->thread.user->f),
                                             (const void __user *)data);
 
-       case PTRACE_GETFDPIC:
-               tmp = 0;
-               switch (addr) {
-               case PTRACE_GETFDPIC_EXEC:
-                       tmp = child->mm->context.exec_fdpic_loadmap;
-                       break;
-               case PTRACE_GETFDPIC_INTERP:
-                       tmp = child->mm->context.interp_fdpic_loadmap;
-                       break;
-               default:
-                       break;
-               }
-
-               ret = 0;
-               if (put_user(tmp, (unsigned long *) data)) {
-                       ret = -EFAULT;
-                       break;
-               }
-               break;
-
        default:
                ret = ptrace_request(child, request, addr, data);
                break;
index 71abd15..6c155d6 100644 (file)
@@ -46,8 +46,9 @@ static void frv_change_dcache_mode(unsigned long newmode)
 /*
  * handle requests to dynamically switch the write caching mode delivered by /proc
  */
-static int procctl_frv_cachemode(ctl_table *table, int write, struct file *filp,
-                                void __user *buffer, size_t *lenp, loff_t *ppos)
+static int procctl_frv_cachemode(ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp,
+                                loff_t *ppos)
 {
        unsigned long hsr0;
        char buff[8];
@@ -84,7 +85,7 @@ static int procctl_frv_cachemode(ctl_table *table, int write, struct file *filp,
        }
 
        /* read the state */
-       if (filp->f_pos > 0) {
+       if (*ppos > 0) {
                *lenp = 0;
                return 0;
        }
@@ -110,7 +111,7 @@ static int procctl_frv_cachemode(ctl_table *table, int write, struct file *filp,
                return -EFAULT;
 
        *lenp = len;
-       filp->f_pos = len;
+       *ppos = len;
        return 0;
 
 } /* end procctl_frv_cachemode() */
@@ -120,8 +121,9 @@ static int procctl_frv_cachemode(ctl_table *table, int write, struct file *filp,
  * permit the mm_struct the nominated process is using have its MMU context ID pinned
  */
 #ifdef CONFIG_MMU
-static int procctl_frv_pin_cxnr(ctl_table *table, int write, struct file *filp,
-                               void __user *buffer, size_t *lenp, loff_t *ppos)
+static int procctl_frv_pin_cxnr(ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp,
+                               loff_t *ppos)
 {
        pid_t pid;
        char buff[16], *p;
@@ -150,7 +152,7 @@ static int procctl_frv_pin_cxnr(ctl_table *table, int write, struct file *filp,
        }
 
        /* read the currently pinned CXN */
-       if (filp->f_pos > 0) {
+       if (*ppos > 0) {
                *lenp = 0;
                return 0;
        }
@@ -163,7 +165,7 @@ static int procctl_frv_pin_cxnr(ctl_table *table, int write, struct file *filp,
                return -EFAULT;
 
        *lenp = len;
-       filp->f_pos = len;
+       *ppos = len;
        return 0;
 
 } /* end procctl_frv_pin_cxnr() */
index d3ecdd8..de08a4a 100644 (file)
@@ -1,17 +1,7 @@
 #ifndef _H8300_SCATTERLIST_H
 #define _H8300_SCATTERLIST_H
 
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long   sg_magic;
-#endif
-       unsigned long   page_link;
-       unsigned int    offset;
-       dma_addr_t      dma_address;
-       unsigned int    length;
-};
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD      (0xffffffff)
 
index 9676100..9561082 100644 (file)
@@ -56,6 +56,9 @@ config MMU
 config NEED_DMA_MAP_STATE
        def_bool y
 
+config NEED_SG_DMA_LENGTH
+       def_bool y
+
 config SWIOTLB
        bool
 
@@ -495,6 +498,14 @@ config HAVE_ARCH_NODEDATA_EXTENSION
        def_bool y
        depends on NUMA
 
+config USE_PERCPU_NUMA_NODE_ID
+       def_bool y
+       depends on NUMA
+
+config HAVE_MEMORYLESS_NODES
+       def_bool y
+       depends on NUMA
+
 config ARCH_PROC_KCORE_TEXT
        def_bool y
        depends on PROC_KCORE
index d8e9896..f299a4f 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _ASM_IA64_SCATTERLIST_H
 #define _ASM_IA64_SCATTERLIST_H
 
+#include <asm-generic/scatterlist.h>
 /*
  * It used to be that ISA_DMA_THRESHOLD had something to do with the
  * DMA-limits of ISA-devices.  Nowadays, its only remaining use (apart
@@ -10,7 +11,6 @@
  * that's 4GB - 1.
  */
 #define ISA_DMA_THRESHOLD      0xffffffff
-
-#include <asm-generic/scatterlist.h>
+#define ARCH_HAS_SG_CHAIN
 
 #endif /* _ASM_IA64_SCATTERLIST_H */
index d323071..09f6467 100644 (file)
 #define RECLAIM_DISTANCE 15
 
 /*
- * Returns the number of the node containing CPU 'cpu'
- */
-#define cpu_to_node(cpu) (int)(cpu_to_node_map[cpu])
-
-/*
  * Returns a bitmask of CPUs on Node 'node'.
  */
 #define cpumask_of_node(node) ((node) == -1 ?                          \
index 3095654..d9485d9 100644 (file)
@@ -31,8 +31,6 @@ struct dma_map_ops swiotlb_dma_ops = {
        .unmap_sg = swiotlb_unmap_sg_attrs,
        .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
        .sync_single_for_device = swiotlb_sync_single_for_device,
-       .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
-       .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
        .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
        .sync_sg_for_device = swiotlb_sync_sg_for_device,
        .dma_supported = swiotlb_dma_supported,
index 0dec7f7..7c7909f 100644 (file)
@@ -638,7 +638,7 @@ ptrace_attach_sync_user_rbs (struct task_struct *child)
         */
 
        read_lock(&tasklist_lock);
-       if (child->signal) {
+       if (child->sighand) {
                spin_lock_irq(&child->sighand->siglock);
                if (child->state == TASK_STOPPED &&
                    !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) {
@@ -662,7 +662,7 @@ ptrace_attach_sync_user_rbs (struct task_struct *child)
         * job control stop, so that SIGCONT can be used to wake it up.
         */
        read_lock(&tasklist_lock);
-       if (child->signal) {
+       if (child->sighand) {
                spin_lock_irq(&child->sighand->siglock);
                if (child->state == TASK_TRACED &&
                    (child->signal->flags & SIGNAL_STOP_STOPPED)) {
index e5230b2..518e876 100644 (file)
@@ -390,6 +390,12 @@ smp_callin (void)
 
        fix_b0_for_bsp();
 
+       /*
+        * numa_node_id() works after this.
+        */
+       set_numa_node(cpu_to_node_map[cpuid]);
+       set_numa_mem(local_memory_node(cpu_to_node_map[cpuid]));
+
        ipi_call_lock_irq();
        spin_lock(&vector_lock);
        /* Setup the per cpu irq handling data structures */
@@ -632,6 +638,7 @@ void __devinit smp_prepare_boot_cpu(void)
 {
        cpu_set(smp_processor_id(), cpu_online_map);
        cpu_set(smp_processor_id(), cpu_callin_map);
+       set_numa_node(cpu_to_node_map[smp_processor_id()]);
        per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
        paravirt_post_smp_prepare_boot_cpu();
 }
index 1ed372c..aeeddd8 100644 (file)
@@ -1,20 +1,7 @@
 #ifndef _ASM_M32R_SCATTERLIST_H
 #define _ASM_M32R_SCATTERLIST_H
 
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-    unsigned long sg_magic;
-#endif
-    char *  address;    /* Location data is to be transferred to, NULL for
-                         * highmem page */
-    unsigned long page_link;
-    unsigned int offset;/* for highmem, page offset */
-
-    dma_addr_t dma_address;
-    unsigned int length;
-};
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD (0x1fffffff)
 
index b5da298..2e3737b 100644 (file)
@@ -7,6 +7,7 @@ config M68K
        default y
        select HAVE_AOUT
        select HAVE_IDE
+       select GENERIC_ATOMIC64
 
 config MMU
        bool
index d2cc35d..b1577f7 100644 (file)
@@ -97,10 +97,6 @@ static void amiga_get_model(char *model);
 static void amiga_get_hardware_list(struct seq_file *m);
 /* amiga specific timer functions */
 static unsigned long amiga_gettimeoffset(void);
-static int a3000_hwclk(int, struct rtc_time *);
-static int a2000_hwclk(int, struct rtc_time *);
-static int amiga_set_clock_mmss(unsigned long);
-static unsigned int amiga_get_ss(void);
 extern void amiga_mksound(unsigned int count, unsigned int ticks);
 static void amiga_reset(void);
 extern void amiga_init_sound(void);
@@ -138,10 +134,6 @@ static struct {
        }
 };
 
-static struct resource rtc_resource = {
-       .start = 0x00dc0000, .end = 0x00dcffff
-};
-
 static struct resource ram_resource[NUM_MEMINFO];
 
 
@@ -387,15 +379,6 @@ void __init config_amiga(void)
        mach_get_model       = amiga_get_model;
        mach_get_hardware_list = amiga_get_hardware_list;
        mach_gettimeoffset   = amiga_gettimeoffset;
-       if (AMIGAHW_PRESENT(A3000_CLK)) {
-               mach_hwclk         = a3000_hwclk;
-               rtc_resource.name = "A3000 RTC";
-               request_resource(&iomem_resource, &rtc_resource);
-       } else /* if (AMIGAHW_PRESENT(A2000_CLK)) */ {
-               mach_hwclk         = a2000_hwclk;
-               rtc_resource.name = "A2000 RTC";
-               request_resource(&iomem_resource, &rtc_resource);
-       }
 
        /*
         * default MAX_DMA=0xffffffff on all machines. If we don't do so, the SCSI
@@ -404,8 +387,6 @@ void __init config_amiga(void)
         */
        mach_max_dma_address = 0xffffffff;
 
-       mach_set_clock_mmss  = amiga_set_clock_mmss;
-       mach_get_ss          = amiga_get_ss;
        mach_reset           = amiga_reset;
 #if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
        mach_beep            = amiga_mksound;
@@ -530,161 +511,6 @@ static unsigned long amiga_gettimeoffset(void)
        return ticks + offset;
 }
 
-static int a3000_hwclk(int op, struct rtc_time *t)
-{
-       tod_3000.cntrl1 = TOD3000_CNTRL1_HOLD;
-
-       if (!op) { /* read */
-               t->tm_sec  = tod_3000.second1 * 10 + tod_3000.second2;
-               t->tm_min  = tod_3000.minute1 * 10 + tod_3000.minute2;
-               t->tm_hour = tod_3000.hour1   * 10 + tod_3000.hour2;
-               t->tm_mday = tod_3000.day1    * 10 + tod_3000.day2;
-               t->tm_wday = tod_3000.weekday;
-               t->tm_mon  = tod_3000.month1  * 10 + tod_3000.month2 - 1;
-               t->tm_year = tod_3000.year1   * 10 + tod_3000.year2;
-               if (t->tm_year <= 69)
-                       t->tm_year += 100;
-       } else {
-               tod_3000.second1 = t->tm_sec / 10;
-               tod_3000.second2 = t->tm_sec % 10;
-               tod_3000.minute1 = t->tm_min / 10;
-               tod_3000.minute2 = t->tm_min % 10;
-               tod_3000.hour1   = t->tm_hour / 10;
-               tod_3000.hour2   = t->tm_hour % 10;
-               tod_3000.day1    = t->tm_mday / 10;
-               tod_3000.day2    = t->tm_mday % 10;
-               if (t->tm_wday != -1)
-                       tod_3000.weekday = t->tm_wday;
-               tod_3000.month1  = (t->tm_mon + 1) / 10;
-               tod_3000.month2  = (t->tm_mon + 1) % 10;
-               if (t->tm_year >= 100)
-                       t->tm_year -= 100;
-               tod_3000.year1   = t->tm_year / 10;
-               tod_3000.year2   = t->tm_year % 10;
-       }
-
-       tod_3000.cntrl1 = TOD3000_CNTRL1_FREE;
-
-       return 0;
-}
-
-static int a2000_hwclk(int op, struct rtc_time *t)
-{
-       int cnt = 5;
-
-       tod_2000.cntrl1 = TOD2000_CNTRL1_HOLD;
-
-       while ((tod_2000.cntrl1 & TOD2000_CNTRL1_BUSY) && cnt) {
-               tod_2000.cntrl1 &= ~TOD2000_CNTRL1_HOLD;
-               udelay(70);
-               tod_2000.cntrl1 |= TOD2000_CNTRL1_HOLD;
-               --cnt;
-       }
-
-       if (!cnt)
-               printk(KERN_INFO "hwclk: timed out waiting for RTC (0x%x)\n",
-                       tod_2000.cntrl1);
-
-       if (!op) { /* read */
-               t->tm_sec  = tod_2000.second1     * 10 + tod_2000.second2;
-               t->tm_min  = tod_2000.minute1     * 10 + tod_2000.minute2;
-               t->tm_hour = (tod_2000.hour1 & 3) * 10 + tod_2000.hour2;
-               t->tm_mday = tod_2000.day1        * 10 + tod_2000.day2;
-               t->tm_wday = tod_2000.weekday;
-               t->tm_mon  = tod_2000.month1      * 10 + tod_2000.month2 - 1;
-               t->tm_year = tod_2000.year1       * 10 + tod_2000.year2;
-               if (t->tm_year <= 69)
-                       t->tm_year += 100;
-
-               if (!(tod_2000.cntrl3 & TOD2000_CNTRL3_24HMODE)) {
-                       if (!(tod_2000.hour1 & TOD2000_HOUR1_PM) && t->tm_hour == 12)
-                               t->tm_hour = 0;
-                       else if ((tod_2000.hour1 & TOD2000_HOUR1_PM) && t->tm_hour != 12)
-                               t->tm_hour += 12;
-               }
-       } else {
-               tod_2000.second1 = t->tm_sec / 10;
-               tod_2000.second2 = t->tm_sec % 10;
-               tod_2000.minute1 = t->tm_min / 10;
-               tod_2000.minute2 = t->tm_min % 10;
-               if (tod_2000.cntrl3 & TOD2000_CNTRL3_24HMODE)
-                       tod_2000.hour1 = t->tm_hour / 10;
-               else if (t->tm_hour >= 12)
-                       tod_2000.hour1 = TOD2000_HOUR1_PM +
-                               (t->tm_hour - 12) / 10;
-               else
-                       tod_2000.hour1 = t->tm_hour / 10;
-               tod_2000.hour2   = t->tm_hour % 10;
-               tod_2000.day1    = t->tm_mday / 10;
-               tod_2000.day2    = t->tm_mday % 10;
-               if (t->tm_wday != -1)
-                       tod_2000.weekday = t->tm_wday;
-               tod_2000.month1  = (t->tm_mon + 1) / 10;
-               tod_2000.month2  = (t->tm_mon + 1) % 10;
-               if (t->tm_year >= 100)
-                       t->tm_year -= 100;
-               tod_2000.year1   = t->tm_year / 10;
-               tod_2000.year2   = t->tm_year % 10;
-       }
-
-       tod_2000.cntrl1 &= ~TOD2000_CNTRL1_HOLD;
-
-       return 0;
-}
-
-static int amiga_set_clock_mmss(unsigned long nowtime)
-{
-       short real_seconds = nowtime % 60, real_minutes = (nowtime / 60) % 60;
-
-       if (AMIGAHW_PRESENT(A3000_CLK)) {
-               tod_3000.cntrl1 = TOD3000_CNTRL1_HOLD;
-
-               tod_3000.second1 = real_seconds / 10;
-               tod_3000.second2 = real_seconds % 10;
-               tod_3000.minute1 = real_minutes / 10;
-               tod_3000.minute2 = real_minutes % 10;
-
-               tod_3000.cntrl1 = TOD3000_CNTRL1_FREE;
-       } else /* if (AMIGAHW_PRESENT(A2000_CLK)) */ {
-               int cnt = 5;
-
-               tod_2000.cntrl1 |= TOD2000_CNTRL1_HOLD;
-
-               while ((tod_2000.cntrl1 & TOD2000_CNTRL1_BUSY) && cnt) {
-                       tod_2000.cntrl1 &= ~TOD2000_CNTRL1_HOLD;
-                       udelay(70);
-                       tod_2000.cntrl1 |= TOD2000_CNTRL1_HOLD;
-                       --cnt;
-               }
-
-               if (!cnt)
-                       printk(KERN_INFO "set_clock_mmss: timed out waiting for RTC (0x%x)\n", tod_2000.cntrl1);
-
-               tod_2000.second1 = real_seconds / 10;
-               tod_2000.second2 = real_seconds % 10;
-               tod_2000.minute1 = real_minutes / 10;
-               tod_2000.minute2 = real_minutes % 10;
-
-               tod_2000.cntrl1 &= ~TOD2000_CNTRL1_HOLD;
-       }
-
-       return 0;
-}
-
-static unsigned int amiga_get_ss(void)
-{
-       unsigned int s;
-
-       if (AMIGAHW_PRESENT(A3000_CLK)) {
-               tod_3000.cntrl1 = TOD3000_CNTRL1_HOLD;
-               s = tod_3000.second1 * 10 + tod_3000.second2;
-               tod_3000.cntrl1 = TOD3000_CNTRL1_FREE;
-       } else /* if (AMIGAHW_PRESENT(A2000_CLK)) */ {
-               s = tod_2000.second1 * 10 + tod_2000.second2;
-       }
-       return s;
-}
-
 static NORET_TYPE void amiga_reset(void)
     ATTRIB_NORET;
 
index 38f18bf..7fd8b41 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/zorro.h>
 
 #include <asm/amigahw.h>
+#include <asm/amigayle.h>
 
 
 #ifdef CONFIG_ZORRO
@@ -55,11 +56,77 @@ static int __init amiga_init_bus(void)
 
 subsys_initcall(amiga_init_bus);
 
-#endif /* CONFIG_ZORRO */
+
+static int z_dev_present(zorro_id id)
+{
+       unsigned int i;
+
+       for (i = 0; i < zorro_num_autocon; i++)
+               if (zorro_autocon[i].rom.er_Manufacturer == ZORRO_MANUF(id) &&
+                   zorro_autocon[i].rom.er_Product == ZORRO_PROD(id))
+                       return 1;
+
+       return 0;
+}
+
+#else /* !CONFIG_ZORRO */
+
+static inline int z_dev_present(zorro_id id) { return 0; }
+
+#endif /* !CONFIG_ZORRO */
+
+
+static const struct resource a3000_scsi_resource __initconst = {
+       .start  = 0xdd0000,
+       .end    = 0xdd00ff,
+       .flags  = IORESOURCE_MEM,
+};
+
+
+static const struct resource a4000t_scsi_resource __initconst = {
+       .start  = 0xdd0000,
+       .end    = 0xdd0fff,
+       .flags  = IORESOURCE_MEM,
+};
+
+
+static const struct resource a1200_ide_resource __initconst = {
+       .start  = 0xda0000,
+       .end    = 0xda1fff,
+       .flags  = IORESOURCE_MEM,
+};
+
+static const struct gayle_ide_platform_data a1200_ide_pdata __initconst = {
+       .base           = 0xda0000,
+       .irqport        = 0xda9000,
+       .explicit_ack   = 1,
+};
+
+
+static const struct resource a4000_ide_resource __initconst = {
+       .start  = 0xdd2000,
+       .end    = 0xdd3fff,
+       .flags  = IORESOURCE_MEM,
+};
+
+static const struct gayle_ide_platform_data a4000_ide_pdata __initconst = {
+       .base           = 0xdd2020,
+       .irqport        = 0xdd3020,
+       .explicit_ack   = 0,
+};
+
+
+static const struct resource amiga_rtc_resource __initconst = {
+       .start  = 0x00dc0000,
+       .end    = 0x00dcffff,
+       .flags  = IORESOURCE_MEM,
+};
 
 
 static int __init amiga_init_devices(void)
 {
+       struct platform_device *pdev;
+
        if (!MACH_IS_AMIGA)
                return -ENODEV;
 
@@ -77,6 +144,53 @@ static int __init amiga_init_devices(void)
        if (AMIGAHW_PRESENT(AMI_FLOPPY))
                platform_device_register_simple("amiga-floppy", -1, NULL, 0);
 
+       if (AMIGAHW_PRESENT(A3000_SCSI))
+               platform_device_register_simple("amiga-a3000-scsi", -1,
+                                               &a3000_scsi_resource, 1);
+
+       if (AMIGAHW_PRESENT(A4000_SCSI))
+               platform_device_register_simple("amiga-a4000t-scsi", -1,
+                                               &a4000t_scsi_resource, 1);
+
+       if (AMIGAHW_PRESENT(A1200_IDE) ||
+           z_dev_present(ZORRO_PROD_MTEC_VIPER_MK_V_E_MATRIX_530_SCSI_IDE)) {
+               pdev = platform_device_register_simple("amiga-gayle-ide", -1,
+                                                      &a1200_ide_resource, 1);
+               platform_device_add_data(pdev, &a1200_ide_pdata,
+                                        sizeof(a1200_ide_pdata));
+       }
+
+       if (AMIGAHW_PRESENT(A4000_IDE)) {
+               pdev = platform_device_register_simple("amiga-gayle-ide", -1,
+                                                      &a4000_ide_resource, 1);
+               platform_device_add_data(pdev, &a4000_ide_pdata,
+                                        sizeof(a4000_ide_pdata));
+       }
+
+
+       /* other I/O hardware */
+       if (AMIGAHW_PRESENT(AMI_KEYBOARD))
+               platform_device_register_simple("amiga-keyboard", -1, NULL, 0);
+
+       if (AMIGAHW_PRESENT(AMI_MOUSE))
+               platform_device_register_simple("amiga-mouse", -1, NULL, 0);
+
+       if (AMIGAHW_PRESENT(AMI_SERIAL))
+               platform_device_register_simple("amiga-serial", -1, NULL, 0);
+
+       if (AMIGAHW_PRESENT(AMI_PARALLEL))
+               platform_device_register_simple("amiga-parallel", -1, NULL, 0);
+
+
+       /* real time clocks */
+       if (AMIGAHW_PRESENT(A2000_CLK))
+               platform_device_register_simple("rtc-msm6242", -1,
+                                               &amiga_rtc_resource, 1);
+
+       if (AMIGAHW_PRESENT(A3000_CLK))
+               platform_device_register_simple("rtc-rp5c01", -1,
+                                               &amiga_rtc_resource, 1);
+
        return 0;
 }
 
index bb5a6aa..a01453d 100644 (file)
@@ -104,4 +104,10 @@ struct GAYLE {
 #define GAYLE_CFG_250NS                0x00
 #define GAYLE_CFG_720NS                0x0c
 
+struct gayle_ide_platform_data {
+       unsigned long base;
+       unsigned long irqport;
+       int explicit_ack;       /* A1200 IDE needs explicit ack */
+};
+
 #endif /* asm-m68k/amigayle.h */
index 8d29145..eab36dc 100644 (file)
@@ -3,3 +3,5 @@
 #else
 #include "atomic_mm.h"
 #endif
+
+#include <asm-generic/atomic64.h>
index fed3fd3..ecafbe1 100644 (file)
@@ -8,4 +8,6 @@
 #define        L1_CACHE_SHIFT  4
 #define        L1_CACHE_BYTES  (1<< L1_CACHE_SHIFT)
 
+#define ARCH_KMALLOC_MINALIGN  L1_CACHE_BYTES
+
 #endif
index e27ad90..175da06 100644 (file)
@@ -1,23 +1,9 @@
 #ifndef _M68K_SCATTERLIST_H
 #define _M68K_SCATTERLIST_H
 
-#include <linux/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long sg_magic;
-#endif
-       unsigned long page_link;
-       unsigned int offset;
-       unsigned int length;
-
-       dma_addr_t dma_address; /* A place to hang host-specific addresses at. */
-};
+#include <asm-generic/scatterlist.h>
 
 /* This is bogus and should go away. */
 #define ISA_DMA_THRESHOLD (0x00ffffff)
 
-#define sg_dma_address(sg)     ((sg)->dma_address)
-#define sg_dma_len(sg)         ((sg)->length)
-
 #endif /* !(_M68K_SCATTERLIST_H) */
index 35d786f..dc4a890 100644 (file)
@@ -1 +1,3 @@
 #include <asm-generic/scatterlist.h>
+
+#define ISA_DMA_THRESHOLD      (~0UL)
index 83d69fe..9af65e7 100644 (file)
@@ -1,27 +1,7 @@
 #ifndef __ASM_SCATTERLIST_H
 #define __ASM_SCATTERLIST_H
 
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long   sg_magic;
-#endif
-       unsigned long   page_link;
-       unsigned int    offset;
-       dma_addr_t      dma_address;
-       unsigned int    length;
-};
-
-/*
- * These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns, or alternatively stop on the first sg_dma_len(sg) which
- * is 0.
- */
-#define sg_dma_address(sg)     ((sg)->dma_address)
-#define sg_dma_len(sg)         ((sg)->length)
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD (0x00ffffffUL)
 
index 6753590..7bd00b9 100644 (file)
 #ifndef _ASM_SCATTERLIST_H
 #define _ASM_SCATTERLIST_H
 
-#include <asm/types.h>
-
-/*
- * Drivers must set either ->address or (preferred) page and ->offset
- * to indicate where data must be transferred to/from.
- *
- * Using page is recommended since it handles highmem data as well as
- * low mem. ->address is restricted to data which has a virtual mapping, and
- * it will go away in the future. Updating to page can be automated very
- * easily -- something like
- *
- * sg->address = some_ptr;
- *
- * can be rewritten as
- *
- * sg_set_page(virt_to_page(some_ptr));
- * sg->offset = (unsigned long) some_ptr & ~PAGE_MASK;
- *
- * and that's it. There's no excuse for not highmem enabling YOUR driver. /jens
- */
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long   sg_magic;
-#endif
-       unsigned long   page_link;
-       unsigned int    offset;         /* for highmem, page offset */
-       dma_addr_t      dma_address;
-       unsigned int    length;
-};
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD (0x00ffffff)
 
-/*
- * These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg)     ((sg)->dma_address)
-#define sg_dma_len(sg)         ((sg)->length)
-
 #endif /* _ASM_SCATTERLIST_H */
index 9c4da3d..05a366a 100644 (file)
@@ -98,6 +98,9 @@ config STACKTRACE_SUPPORT
 config NEED_DMA_MAP_STATE
        def_bool y
 
+config NEED_SG_DMA_LENGTH
+       def_bool y
+
 config ISA_DMA_API
        bool
 
index 62269b3..2c3b79b 100644 (file)
@@ -3,25 +3,9 @@
 
 #include <asm/page.h>
 #include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long sg_magic;
-#endif
-       unsigned long page_link;
-       unsigned int offset;
-
-       unsigned int length;
-
-       /* an IOVA can be 64-bits on some PA-Risc platforms. */
-       dma_addr_t iova;        /* I/O Virtual Address */
-       __u32      iova_length; /* bytes mapped */
-};
-
-#define sg_virt_addr(sg) ((unsigned long)sg_virt(sg))
-#define sg_dma_address(sg) ((sg)->iova)
-#define sg_dma_len(sg)     ((sg)->iova_length)
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD (~0UL)
+#define sg_virt_addr(sg) ((unsigned long)sg_virt(sg))
 
 #endif /* _ASM_PARISC_SCATTERLIST_H */
index c4c4549..66a315e 100644 (file)
@@ -663,6 +663,9 @@ config ZONE_DMA
 config NEED_DMA_MAP_STATE
        def_bool (PPC64 || NOT_COHERENT_CACHE)
 
+config NEED_SG_DMA_LENGTH
+       def_bool y
+
 config GENERIC_ISA_DMA
        bool
        depends on PPC64 || POWER4 || 6xx && !CPM2
index 912bf59..34cc78f 100644 (file)
@@ -9,38 +9,12 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifdef __KERNEL__
-#include <linux/types.h>
 #include <asm/dma.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long sg_magic;
-#endif
-       unsigned long page_link;
-       unsigned int offset;
-       unsigned int length;
-
-       /* For TCE or SWIOTLB support */
-       dma_addr_t dma_address;
-       u32 dma_length;
-};
-
-/*
- * These macros should be used after a dma_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns, or alternatively stop on the first sg_dma_len(sg) which
- * is 0.
- */
-#define sg_dma_address(sg)     ((sg)->dma_address)
-#define sg_dma_len(sg)         ((sg)->dma_length)
+#include <asm-generic/scatterlist.h>
 
 #ifdef __powerpc64__
 #define ISA_DMA_THRESHOLD      (~0UL)
 #endif
-
 #define ARCH_HAS_SG_CHAIN
 
-#endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SCATTERLIST_H */
index 4ff4da2..e7fe218 100644 (file)
@@ -39,8 +39,8 @@ struct dma_map_ops swiotlb_dma_ops = {
        .dma_supported = swiotlb_dma_supported,
        .map_page = swiotlb_map_page,
        .unmap_page = swiotlb_unmap_page,
-       .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
-       .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
+       .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+       .sync_single_for_device = swiotlb_sync_single_for_device,
        .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
        .sync_sg_for_device = swiotlb_sync_sg_for_device,
        .mapping_error = swiotlb_dma_mapping_error,
index 6c1df57..8d1de6f 100644 (file)
@@ -127,11 +127,11 @@ static inline void dma_direct_sync_sg(struct device *dev,
                __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
 }
 
-static inline void dma_direct_sync_single_range(struct device *dev,
-               dma_addr_t dma_handle, unsigned long offset, size_t size,
-               enum dma_data_direction direction)
+static inline void dma_direct_sync_single(struct device *dev,
+                                         dma_addr_t dma_handle, size_t size,
+                                         enum dma_data_direction direction)
 {
-       __dma_sync(bus_to_virt(dma_handle+offset), size, direction);
+       __dma_sync(bus_to_virt(dma_handle), size, direction);
 }
 #endif
 
@@ -144,8 +144,8 @@ struct dma_map_ops dma_direct_ops = {
        .map_page       = dma_direct_map_page,
        .unmap_page     = dma_direct_unmap_page,
 #ifdef CONFIG_NOT_COHERENT_CACHE
-       .sync_single_range_for_cpu      = dma_direct_sync_single_range,
-       .sync_single_range_for_device   = dma_direct_sync_single_range,
+       .sync_single_for_cpu            = dma_direct_sync_single,
+       .sync_single_for_device         = dma_direct_sync_single,
        .sync_sg_for_cpu                = dma_direct_sync_sg,
        .sync_sg_for_device             = dma_direct_sync_sg,
 #endif
index 6a1fde0..cd37e49 100644 (file)
@@ -1,6 +1,15 @@
 /*
  * Freescale MPC85xx/MPC86xx RapidIO support
  *
+ * Copyright 2009 Sysgo AG
+ * Thomas Moll <thomas.moll@sysgo.com>
+ * - fixed maintenance access routines, check for aligned access
+ *
+ * Copyright 2009 Integrated Device Technology, Inc.
+ * Alex Bounine <alexandre.bounine@idt.com>
+ * - Added Port-Write message handling
+ * - Added Machine Check exception handling
+ *
  * Copyright (C) 2007, 2008 Freescale Semiconductor, Inc.
  * Zhang Wei <wei.zhang@freescale.com>
  *
 #include <linux/of_platform.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/kfifo.h>
 
 #include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+
+#undef DEBUG_PW        /* Port-Write debugging */
 
 /* RapidIO definition irq, which read from OF-tree */
 #define IRQ_RIO_BELL(m)                (((struct rio_priv *)(m->priv))->bellirq)
 #define IRQ_RIO_TX(m)          (((struct rio_priv *)(m->priv))->txirq)
 #define IRQ_RIO_RX(m)          (((struct rio_priv *)(m->priv))->rxirq)
+#define IRQ_RIO_PW(m)          (((struct rio_priv *)(m->priv))->pwirq)
 
 #define RIO_ATMU_REGS_OFFSET   0x10c00
 #define RIO_P_MSG_REGS_OFFSET  0x11000
 #define RIO_S_MSG_REGS_OFFSET  0x13000
 #define RIO_ESCSR              0x158
 #define RIO_CCSR               0x15c
+#define RIO_LTLEDCSR           0x0608
+#define  RIO_LTLEDCSR_IER      0x80000000
+#define  RIO_LTLEDCSR_PRT      0x01000000
+#define RIO_LTLEECSR           0x060c
+#define RIO_EPWISR             0x10010
 #define RIO_ISR_AACR           0x10120
 #define RIO_ISR_AACR_AA                0x1     /* Accept All ID */
 #define RIO_MAINT_WIN_SIZE     0x400000
 #define RIO_MSG_ISR_QFI                0x00000010
 #define RIO_MSG_ISR_DIQI       0x00000001
 
+#define RIO_IPWMR_SEN          0x00100000
+#define RIO_IPWMR_QFIE         0x00000100
+#define RIO_IPWMR_EIE          0x00000020
+#define RIO_IPWMR_CQ           0x00000002
+#define RIO_IPWMR_PWE          0x00000001
+
+#define RIO_IPWSR_QF           0x00100000
+#define RIO_IPWSR_TE           0x00000080
+#define RIO_IPWSR_QFI          0x00000010
+#define RIO_IPWSR_PWD          0x00000008
+#define RIO_IPWSR_PWB          0x00000004
+
 #define RIO_MSG_DESC_SIZE      32
 #define RIO_MSG_BUFFER_SIZE    4096
 #define RIO_MIN_TX_RING_SIZE   2
@@ -121,7 +153,7 @@ struct rio_msg_regs {
        u32 pad10[26];
        u32 pwmr;
        u32 pwsr;
-       u32 pad11;
+       u32 epwqbar;
        u32 pwqbar;
 };
 
@@ -160,6 +192,14 @@ struct rio_msg_rx_ring {
        void *dev_id;
 };
 
+struct rio_port_write_msg {
+       void *virt;
+       dma_addr_t phys;
+       u32 msg_count;
+       u32 err_count;
+       u32 discard_count;
+};
+
 struct rio_priv {
        struct device *dev;
        void __iomem *regs_win;
@@ -172,11 +212,64 @@ struct rio_priv {
        struct rio_dbell_ring dbell_ring;
        struct rio_msg_tx_ring msg_tx_ring;
        struct rio_msg_rx_ring msg_rx_ring;
+       struct rio_port_write_msg port_write_msg;
        int bellirq;
        int txirq;
        int rxirq;
+       int pwirq;
+       struct work_struct pw_work;
+       struct kfifo pw_fifo;
+       spinlock_t pw_fifo_lock;
 };
 
+#define __fsl_read_rio_config(x, addr, err, op)                \
+       __asm__ __volatile__(                           \
+               "1:     "op" %1,0(%2)\n"                \
+               "       eieio\n"                        \
+               "2:\n"                                  \
+               ".section .fixup,\"ax\"\n"              \
+               "3:     li %1,-1\n"                     \
+               "       li %0,%3\n"                     \
+               "       b 2b\n"                         \
+               ".section __ex_table,\"a\"\n"           \
+               "       .align 2\n"                     \
+               "       .long 1b,3b\n"                  \
+               ".text"                                 \
+               : "=r" (err), "=r" (x)                  \
+               : "b" (addr), "i" (-EFAULT), "0" (err))
+
+static void __iomem *rio_regs_win;
+
+static int (*saved_mcheck_exception)(struct pt_regs *regs);
+
+static int fsl_rio_mcheck_exception(struct pt_regs *regs)
+{
+       const struct exception_table_entry *entry = NULL;
+       unsigned long reason = (mfspr(SPRN_MCSR) & MCSR_MASK);
+
+       if (reason & MCSR_BUS_RBERR) {
+               reason = in_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR));
+               if (reason & (RIO_LTLEDCSR_IER | RIO_LTLEDCSR_PRT)) {
+                       /* Check if we are prepared to handle this fault */
+                       entry = search_exception_tables(regs->nip);
+                       if (entry) {
+                               pr_debug("RIO: %s - MC Exception handled\n",
+                                        __func__);
+                               out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR),
+                                        0);
+                               regs->msr |= MSR_RI;
+                               regs->nip = entry->fixup;
+                               return 1;
+                       }
+               }
+       }
+
+       if (saved_mcheck_exception)
+               return saved_mcheck_exception(regs);
+       else
+               return cur_cpu_spec->machine_check(regs);
+}
+
 /**
  * fsl_rio_doorbell_send - Send a MPC85xx doorbell message
  * @mport: RapidIO master port info
@@ -277,27 +370,44 @@ fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid,
 {
        struct rio_priv *priv = mport->priv;
        u8 *data;
+       u32 rval, err = 0;
 
        pr_debug
            ("fsl_rio_config_read: index %d destid %d hopcount %d offset %8.8x len %d\n",
             index, destid, hopcount, offset, len);
+
+       /* 16MB maintenance window possible */
+       /* allow only aligned access to maintenance registers */
+       if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len))
+               return -EINVAL;
+
        out_be32(&priv->maint_atmu_regs->rowtar,
-                (destid << 22) | (hopcount << 12) | ((offset & ~0x3) >> 9));
+                (destid << 22) | (hopcount << 12) | (offset >> 12));
+       out_be32(&priv->maint_atmu_regs->rowtear,  (destid >> 10));
 
-       data = (u8 *) priv->maint_win + offset;
+       data = (u8 *) priv->maint_win + (offset & (RIO_MAINT_WIN_SIZE - 1));
        switch (len) {
        case 1:
-               *val = in_8((u8 *) data);
+               __fsl_read_rio_config(rval, data, err, "lbz");
                break;
        case 2:
-               *val = in_be16((u16 *) data);
+               __fsl_read_rio_config(rval, data, err, "lhz");
                break;
-       default:
-               *val = in_be32((u32 *) data);
+       case 4:
+               __fsl_read_rio_config(rval, data, err, "lwz");
                break;
+       default:
+               return -EINVAL;
        }
 
-       return 0;
+       if (err) {
+               pr_debug("RIO: cfg_read error %d for %x:%x:%x\n",
+                        err, destid, hopcount, offset);
+       }
+
+       *val = rval;
+
+       return err;
 }
 
 /**
@@ -322,10 +432,17 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid,
        pr_debug
            ("fsl_rio_config_write: index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n",
             index, destid, hopcount, offset, len, val);
+
+       /* 16MB maintenance windows possible */
+       /* allow only aligned access to maintenance registers */
+       if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len))
+               return -EINVAL;
+
        out_be32(&priv->maint_atmu_regs->rowtar,
-                (destid << 22) | (hopcount << 12) | ((offset & ~0x3) >> 9));
+                (destid << 22) | (hopcount << 12) | (offset >> 12));
+       out_be32(&priv->maint_atmu_regs->rowtear,  (destid >> 10));
 
-       data = (u8 *) priv->maint_win + offset;
+       data = (u8 *) priv->maint_win + (offset & (RIO_MAINT_WIN_SIZE - 1));
        switch (len) {
        case 1:
                out_8((u8 *) data, val);
@@ -333,9 +450,11 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid,
        case 2:
                out_be16((u16 *) data, val);
                break;
-       default:
+       case 4:
                out_be32((u32 *) data, val);
                break;
+       default:
+               return -EINVAL;
        }
 
        return 0;
@@ -930,6 +1049,223 @@ static int fsl_rio_doorbell_init(struct rio_mport *mport)
        return rc;
 }
 
+/**
+ * fsl_rio_port_write_handler - MPC85xx port write interrupt handler
+ * @irq: Linux interrupt number
+ * @dev_instance: Pointer to interrupt-specific data
+ *
+ * Handles port write interrupts. Parses a list of registered
+ * port write event handlers and executes a matching event handler.
+ */
+static irqreturn_t
+fsl_rio_port_write_handler(int irq, void *dev_instance)
+{
+       u32 ipwmr, ipwsr;
+       struct rio_mport *port = (struct rio_mport *)dev_instance;
+       struct rio_priv *priv = port->priv;
+       u32 epwisr, tmp;
+
+       ipwmr = in_be32(&priv->msg_regs->pwmr);
+       ipwsr = in_be32(&priv->msg_regs->pwsr);
+
+       epwisr = in_be32(priv->regs_win + RIO_EPWISR);
+       if (epwisr & 0x80000000) {
+               tmp = in_be32(priv->regs_win + RIO_LTLEDCSR);
+               pr_info("RIO_LTLEDCSR = 0x%x\n", tmp);
+               out_be32(priv->regs_win + RIO_LTLEDCSR, 0);
+       }
+
+       if (!(epwisr & 0x00000001))
+               return IRQ_HANDLED;
+
+#ifdef DEBUG_PW
+       pr_debug("PW Int->IPWMR: 0x%08x IPWSR: 0x%08x (", ipwmr, ipwsr);
+       if (ipwsr & RIO_IPWSR_QF)
+               pr_debug(" QF");
+       if (ipwsr & RIO_IPWSR_TE)
+               pr_debug(" TE");
+       if (ipwsr & RIO_IPWSR_QFI)
+               pr_debug(" QFI");
+       if (ipwsr & RIO_IPWSR_PWD)
+               pr_debug(" PWD");
+       if (ipwsr & RIO_IPWSR_PWB)
+               pr_debug(" PWB");
+       pr_debug(" )\n");
+#endif
+       out_be32(&priv->msg_regs->pwsr,
+                ipwsr & (RIO_IPWSR_TE | RIO_IPWSR_QFI | RIO_IPWSR_PWD));
+
+       if ((ipwmr & RIO_IPWMR_EIE) && (ipwsr & RIO_IPWSR_TE)) {
+               priv->port_write_msg.err_count++;
+               pr_info("RIO: Port-Write Transaction Err (%d)\n",
+                        priv->port_write_msg.err_count);
+       }
+       if (ipwsr & RIO_IPWSR_PWD) {
+               priv->port_write_msg.discard_count++;
+               pr_info("RIO: Port Discarded Port-Write Msg(s) (%d)\n",
+                        priv->port_write_msg.discard_count);
+       }
+
+       /* Schedule deferred processing if PW was received */
+       if (ipwsr & RIO_IPWSR_QFI) {
+               /* Save PW message (if there is room in FIFO),
+                * otherwise discard it.
+                */
+               if (kfifo_avail(&priv->pw_fifo) >= RIO_PW_MSG_SIZE) {
+                       priv->port_write_msg.msg_count++;
+                       kfifo_in(&priv->pw_fifo, priv->port_write_msg.virt,
+                                RIO_PW_MSG_SIZE);
+               } else {
+                       priv->port_write_msg.discard_count++;
+                       pr_info("RIO: ISR Discarded Port-Write Msg(s) (%d)\n",
+                                priv->port_write_msg.discard_count);
+               }
+               schedule_work(&priv->pw_work);
+       }
+
+       /* Issue Clear Queue command. This allows another
+        * port-write to be received.
+        */
+       out_be32(&priv->msg_regs->pwmr, ipwmr | RIO_IPWMR_CQ);
+
+       return IRQ_HANDLED;
+}
+
+static void fsl_pw_dpc(struct work_struct *work)
+{
+       struct rio_priv *priv = container_of(work, struct rio_priv, pw_work);
+       unsigned long flags;
+       u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)];
+
+       /*
+        * Process port-write messages
+        */
+       spin_lock_irqsave(&priv->pw_fifo_lock, flags);
+       while (kfifo_out(&priv->pw_fifo, (unsigned char *)msg_buffer,
+                        RIO_PW_MSG_SIZE)) {
+               /* Process one message */
+               spin_unlock_irqrestore(&priv->pw_fifo_lock, flags);
+#ifdef DEBUG_PW
+               {
+               u32 i;
+               pr_debug("%s : Port-Write Message:", __func__);
+               for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); i++) {
+                       if ((i%4) == 0)
+                               pr_debug("\n0x%02x: 0x%08x", i*4,
+                                        msg_buffer[i]);
+                       else
+                               pr_debug(" 0x%08x", msg_buffer[i]);
+               }
+               pr_debug("\n");
+               }
+#endif
+               /* Pass the port-write message to RIO core for processing */
+               rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer);
+               spin_lock_irqsave(&priv->pw_fifo_lock, flags);
+       }
+       spin_unlock_irqrestore(&priv->pw_fifo_lock, flags);
+}
+
+/**
+ * fsl_rio_pw_enable - enable/disable port-write interface init
+ * @mport: Master port implementing the port write unit
+ * @enable:    1=enable; 0=disable port-write message handling
+ */
+static int fsl_rio_pw_enable(struct rio_mport *mport, int enable)
+{
+       struct rio_priv *priv = mport->priv;
+       u32 rval;
+
+       rval = in_be32(&priv->msg_regs->pwmr);
+
+       if (enable)
+               rval |= RIO_IPWMR_PWE;
+       else
+               rval &= ~RIO_IPWMR_PWE;
+
+       out_be32(&priv->msg_regs->pwmr, rval);
+
+       return 0;
+}
+
+/**
+ * fsl_rio_port_write_init - MPC85xx port write interface init
+ * @mport: Master port implementing the port write unit
+ *
+ * Initializes port write unit hardware and DMA buffer
+ * ring. Called from fsl_rio_setup(). Returns %0 on success
+ * or %-ENOMEM on failure.
+ */
+static int fsl_rio_port_write_init(struct rio_mport *mport)
+{
+       struct rio_priv *priv = mport->priv;
+       int rc = 0;
+
+       /* Following configurations require a disabled port write controller */
+       out_be32(&priv->msg_regs->pwmr,
+                in_be32(&priv->msg_regs->pwmr) & ~RIO_IPWMR_PWE);
+
+       /* Initialize port write */
+       priv->port_write_msg.virt = dma_alloc_coherent(priv->dev,
+                                       RIO_PW_MSG_SIZE,
+                                       &priv->port_write_msg.phys, GFP_KERNEL);
+       if (!priv->port_write_msg.virt) {
+               pr_err("RIO: unable allocate port write queue\n");
+               return -ENOMEM;
+       }
+
+       priv->port_write_msg.err_count = 0;
+       priv->port_write_msg.discard_count = 0;
+
+       /* Point dequeue/enqueue pointers at first entry */
+       out_be32(&priv->msg_regs->epwqbar, 0);
+       out_be32(&priv->msg_regs->pwqbar, (u32) priv->port_write_msg.phys);
+
+       pr_debug("EIPWQBAR: 0x%08x IPWQBAR: 0x%08x\n",
+                in_be32(&priv->msg_regs->epwqbar),
+                in_be32(&priv->msg_regs->pwqbar));
+
+       /* Clear interrupt status IPWSR */
+       out_be32(&priv->msg_regs->pwsr,
+                (RIO_IPWSR_TE | RIO_IPWSR_QFI | RIO_IPWSR_PWD));
+
+       /* Configure port write contoller for snooping enable all reporting,
+          clear queue full */
+       out_be32(&priv->msg_regs->pwmr,
+                RIO_IPWMR_SEN | RIO_IPWMR_QFIE | RIO_IPWMR_EIE | RIO_IPWMR_CQ);
+
+
+       /* Hook up port-write handler */
+       rc = request_irq(IRQ_RIO_PW(mport), fsl_rio_port_write_handler, 0,
+                        "port-write", (void *)mport);
+       if (rc < 0) {
+               pr_err("MPC85xx RIO: unable to request inbound doorbell irq");
+               goto err_out;
+       }
+
+       INIT_WORK(&priv->pw_work, fsl_pw_dpc);
+       spin_lock_init(&priv->pw_fifo_lock);
+       if (kfifo_alloc(&priv->pw_fifo, RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
+               pr_err("FIFO allocation failed\n");
+               rc = -ENOMEM;
+               goto err_out_irq;
+       }
+
+       pr_debug("IPWMR: 0x%08x IPWSR: 0x%08x\n",
+                in_be32(&priv->msg_regs->pwmr),
+                in_be32(&priv->msg_regs->pwsr));
+
+       return rc;
+
+err_out_irq:
+       free_irq(IRQ_RIO_PW(mport), (void *)mport);
+err_out:
+       dma_free_coherent(priv->dev, RIO_PW_MSG_SIZE,
+                         priv->port_write_msg.virt,
+                         priv->port_write_msg.phys);
+       return rc;
+}
+
 static char *cmdline = NULL;
 
 static int fsl_rio_get_hdid(int index)
@@ -1057,7 +1393,7 @@ int fsl_rio_setup(struct of_device *dev)
        dev_info(&dev->dev, "LAW start 0x%016llx, size 0x%016llx.\n",
                        law_start, law_size);
 
-       ops = kmalloc(sizeof(struct rio_ops), GFP_KERNEL);
+       ops = kzalloc(sizeof(struct rio_ops), GFP_KERNEL);
        if (!ops) {
                rc = -ENOMEM;
                goto err_ops;
@@ -1067,6 +1403,7 @@ int fsl_rio_setup(struct of_device *dev)
        ops->cread = fsl_rio_config_read;
        ops->cwrite = fsl_rio_config_write;
        ops->dsend = fsl_rio_doorbell_send;
+       ops->pwenable = fsl_rio_pw_enable;
 
        port = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
        if (!port) {
@@ -1089,11 +1426,12 @@ int fsl_rio_setup(struct of_device *dev)
        port->iores.flags = IORESOURCE_MEM;
        port->iores.name = "rio_io_win";
 
+       priv->pwirq   = irq_of_parse_and_map(dev->node, 0);
        priv->bellirq = irq_of_parse_and_map(dev->dev.of_node, 2);
        priv->txirq = irq_of_parse_and_map(dev->dev.of_node, 3);
        priv->rxirq = irq_of_parse_and_map(dev->dev.of_node, 4);
-       dev_info(&dev->dev, "bellirq: %d, txirq: %d, rxirq %d\n", priv->bellirq,
-                               priv->txirq, priv->rxirq);
+       dev_info(&dev->dev, "pwirq: %d, bellirq: %d, txirq: %d, rxirq %d\n",
+                priv->pwirq, priv->bellirq, priv->txirq, priv->rxirq);
 
        rio_init_dbell_res(&port->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff);
        rio_init_mbox_res(&port->riores[RIO_INB_MBOX_RESOURCE], 0, 0);
@@ -1109,6 +1447,7 @@ int fsl_rio_setup(struct of_device *dev)
        rio_register_mport(port);
 
        priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1);
+       rio_regs_win = priv->regs_win;
 
        /* Probe the master port phy type */
        ccsr = in_be32(priv->regs_win + RIO_CCSR);
@@ -1166,7 +1505,8 @@ int fsl_rio_setup(struct of_device *dev)
 
        /* Configure maintenance transaction window */
        out_be32(&priv->maint_atmu_regs->rowbar, law_start >> 12);
-       out_be32(&priv->maint_atmu_regs->rowar, 0x80077015);    /* 4M */
+       out_be32(&priv->maint_atmu_regs->rowar,
+                0x80077000 | (ilog2(RIO_MAINT_WIN_SIZE) - 1));
 
        priv->maint_win = ioremap(law_start, RIO_MAINT_WIN_SIZE);
 
@@ -1175,6 +1515,12 @@ int fsl_rio_setup(struct of_device *dev)
                        (law_start + RIO_MAINT_WIN_SIZE) >> 12);
        out_be32(&priv->dbell_atmu_regs->rowar, 0x8004200b);    /* 4k */
        fsl_rio_doorbell_init(port);
+       fsl_rio_port_write_init(port);
+
+       saved_mcheck_exception = ppc_md.machine_check_exception;
+       ppc_md.machine_check_exception = fsl_rio_mcheck_exception;
+       /* Ensure that RFXE is set */
+       mtspr(SPRN_HID1, (mfspr(SPRN_HID1) | 0x20000));
 
        return 0;
 err:
index 35d786f..be44d94 100644 (file)
@@ -1 +1,3 @@
+#define ISA_DMA_THRESHOLD      (~0UL)
+
 #include <asm-generic/scatterlist.h>
index e4d98de..541053e 100644 (file)
@@ -944,21 +944,21 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
        struct cpu *c = &per_cpu(cpu_devices, cpu);
        struct sys_device *s = &c->sysdev;
        struct s390_idle_data *idle;
+       int err = 0;
 
        switch (action) {
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                idle = &per_cpu(s390_idle, cpu);
                memset(idle, 0, sizeof(struct s390_idle_data));
-               if (sysfs_create_group(&s->kobj, &cpu_online_attr_group))
-                       return NOTIFY_BAD;
+               err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
                sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
                break;
        }
-       return NOTIFY_OK;
+       return notifier_from_errno(err);
 }
 
 static struct notifier_block __cpuinitdata smp_cpu_nb = {
index 9f533b8..4fa1a66 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _ASM_SCORE_SCATTERLIST_H
 #define _ASM_SCORE_SCATTERLIST_H
 
+#define ISA_DMA_THRESHOLD      (~0UL)
+
 #include <asm-generic/scatterlist.h>
 
 #endif /* _ASM_SCORE_SCATTERLIST_H */
index 0e318c9..c5ee4ce 100644 (file)
@@ -186,6 +186,9 @@ config DMA_NONCOHERENT
 config NEED_DMA_MAP_STATE
        def_bool DMA_NONCOHERENT
 
+config NEED_SG_DMA_LENGTH
+       def_bool y
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
index d4104ce..6c4bbba 100644 (file)
@@ -436,29 +436,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                                             0, sizeof(struct pt_dspregs),
                                             (const void __user *)data);
 #endif
-#ifdef CONFIG_BINFMT_ELF_FDPIC
-       case PTRACE_GETFDPIC: {
-               unsigned long tmp = 0;
-
-               switch (addr) {
-               case PTRACE_GETFDPIC_EXEC:
-                       tmp = child->mm->context.exec_fdpic_loadmap;
-                       break;
-               case PTRACE_GETFDPIC_INTERP:
-                       tmp = child->mm->context.interp_fdpic_loadmap;
-                       break;
-               default:
-                       break;
-               }
-
-               ret = 0;
-               if (put_user(tmp, datap)) {
-                       ret = -EFAULT;
-                       break;
-               }
-               break;
-       }
-#endif
        default:
                ret = ptrace_request(child, request, addr, data);
                break;
index d6781ce..6f1470b 100644 (file)
@@ -133,6 +133,9 @@ config ZONE_DMA
 config NEED_DMA_MAP_STATE
        def_bool y
 
+config NEED_SG_DMA_LENGTH
+       def_bool y
+
 config GENERIC_ISA_DMA
        bool
        default y if SPARC32
index d112025..433e45f 100644 (file)
@@ -1,8 +1,9 @@
 #ifndef _SPARC_SCATTERLIST_H
 #define _SPARC_SCATTERLIST_H
 
-#define sg_dma_len(sg)         ((sg)->dma_length)
-
 #include <asm-generic/scatterlist.h>
 
+#define ISA_DMA_THRESHOLD      (~0UL)
+#define ARCH_HAS_SG_CHAIN
+
 #endif /* !(_SPARC_SCATTERLIST_H) */
index e0c619c..dcb0593 100644 (file)
@@ -109,6 +109,9 @@ config SBUS
 config NEED_DMA_MAP_STATE
        def_bool (X86_64 || DMAR || DMA_API_DEBUG)
 
+config NEED_SG_DMA_LENGTH
+       def_bool y
+
 config GENERIC_ISA_DMA
        def_bool y
 
@@ -1703,6 +1706,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
        def_bool X86_64
        depends on NUMA
 
+config USE_PERCPU_NUMA_NODE_ID
+       def_bool X86_64
+       depends on NUMA
+
 menu "Power management and ACPI options"
 
 config ARCH_HIBERNATION_HEADER
index 75af592..fb0b187 100644 (file)
@@ -1,8 +1,9 @@
 #ifndef _ASM_X86_SCATTERLIST_H
 #define _ASM_X86_SCATTERLIST_H
 
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
 #include <asm-generic/scatterlist.h>
 
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+#define ARCH_HAS_SG_CHAIN
+
 #endif /* _ASM_X86_SCATTERLIST_H */
index c5087d7..21899cc 100644 (file)
 extern int cpu_to_node_map[];
 
 /* Returns the number of the node containing CPU 'cpu' */
-static inline int cpu_to_node(int cpu)
+static inline int __cpu_to_node(int cpu)
 {
        return cpu_to_node_map[cpu];
 }
-#define early_cpu_to_node(cpu) cpu_to_node(cpu)
+#define early_cpu_to_node __cpu_to_node
+#define cpu_to_node __cpu_to_node
 
 #else /* CONFIG_X86_64 */
 
 /* Mappings between logical cpu number and node number */
 DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 
-/* Returns the number of the current Node. */
-DECLARE_PER_CPU(int, node_number);
-#define numa_node_id()         percpu_read(node_number)
-
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-extern int cpu_to_node(int cpu);
+/*
+ * override generic percpu implementation of cpu_to_node
+ */
+extern int __cpu_to_node(int cpu);
+#define cpu_to_node __cpu_to_node
+
 extern int early_cpu_to_node(int cpu);
 
 #else  /* !CONFIG_DEBUG_PER_CPU_MAPS */
 
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int cpu_to_node(int cpu)
-{
-       return per_cpu(x86_cpu_to_node_map, cpu);
-}
-
 /* Same function but used if called before per_cpu areas are setup */
 static inline int early_cpu_to_node(int cpu)
 {
@@ -170,6 +166,10 @@ static inline int numa_node_id(void)
 {
        return 0;
 }
+/*
+ * indicate override:
+ */
+#define numa_node_id numa_node_id
 
 static inline int early_cpu_to_node(int cpu)
 {
index cc83a00..68e4a6f 100644 (file)
@@ -1121,9 +1121,9 @@ void __cpuinit cpu_init(void)
        oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
-       if (cpu != 0 && percpu_read(node_number) == 0 &&
-           cpu_to_node(cpu) != NUMA_NO_NODE)
-               percpu_write(node_number, cpu_to_node(cpu));
+       if (cpu != 0 && percpu_read(numa_node) == 0 &&
+           early_cpu_to_node(cpu) != NUMA_NO_NODE)
+               set_numa_node(early_cpu_to_node(cpu));
 #endif
 
        me = current;
index 81c499e..e1a0a3b 100644 (file)
@@ -190,7 +190,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
                mutex_unlock(&therm_cpu_lock);
                break;
        }
-       return err ? NOTIFY_BAD : NOTIFY_OK;
+       return notifier_from_errno(err);
 }
 
 static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
index 8b862d5..1b7b31a 100644 (file)
@@ -170,7 +170,7 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb,
                cpuid_device_destroy(cpu);
                break;
        }
-       return err ? NOTIFY_BAD : NOTIFY_OK;
+       return notifier_from_errno(err);
 }
 
 static struct notifier_block __refdata cpuid_class_cpu_notifier =
index 4d4468e..7bf2dc4 100644 (file)
@@ -230,7 +230,7 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb,
                msr_device_destroy(cpu);
                break;
        }
-       return err ? NOTIFY_BAD : NOTIFY_OK;
+       return notifier_from_errno(err);
 }
 
 static struct notifier_block __refdata msr_class_cpu_notifier = {
index 7d2829d..a5bc528 100644 (file)
@@ -31,8 +31,6 @@ static struct dma_map_ops swiotlb_dma_ops = {
        .free_coherent = swiotlb_free_coherent,
        .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
        .sync_single_for_device = swiotlb_sync_single_for_device,
-       .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
-       .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
        .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
        .sync_sg_for_device = swiotlb_sync_sg_for_device,
        .map_sg = swiotlb_map_sg_attrs,
index ef6370b..a867940 100644 (file)
@@ -265,10 +265,10 @@ void __init setup_per_cpu_areas(void)
 
 #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
        /*
-        * make sure boot cpu node_number is right, when boot cpu is on the
+        * make sure boot cpu numa_node is right, when boot cpu is on the
         * node that doesn't have mem installed
         */
-       per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
+       set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id));
 #endif
 
        /* Setup node to cpumask map */
index 550df48..10c27bb 100644 (file)
@@ -2,6 +2,7 @@
 #include <linux/topology.h>
 #include <linux/module.h>
 #include <linux/bootmem.h>
+#include <linux/random.h>
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 # define DBG(x...) printk(KERN_DEBUG x)
@@ -65,3 +66,19 @@ const struct cpumask *cpumask_of_node(int node)
 }
 EXPORT_SYMBOL(cpumask_of_node);
 #endif
+
+/*
+ * Return the bit number of a random bit set in the nodemask.
+ *   (returns -1 if nodemask is empty)
+ */
+int __node_random(const nodemask_t *maskp)
+{
+       int w, bit = -1;
+
+       w = nodes_weight(*maskp);
+       if (w)
+               bit = bitmap_ord_to_pos(maskp->bits,
+                       get_random_int() % w, MAX_NUMNODES);
+       return bit;
+}
+EXPORT_SYMBOL(__node_random);
index 8948f47..a7bcc23 100644 (file)
@@ -33,9 +33,6 @@ int numa_off __initdata;
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
 
-DEFINE_PER_CPU(int, node_number) = 0;
-EXPORT_PER_CPU_SYMBOL(node_number);
-
 /*
  * Map cpu index to node index
  */
@@ -809,7 +806,7 @@ void __cpuinit numa_set_node(int cpu, int node)
        per_cpu(x86_cpu_to_node_map, cpu) = node;
 
        if (node != NUMA_NO_NODE)
-               per_cpu(node_number, cpu) = node;
+               set_cpu_numa_node(cpu, node);
 }
 
 void __cpuinit numa_clear_node(int cpu)
@@ -867,7 +864,7 @@ void __cpuinit numa_remove_cpu(int cpu)
        numa_set_cpumask(cpu, 0);
 }
 
-int cpu_to_node(int cpu)
+int __cpu_to_node(int cpu)
 {
        if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
                printk(KERN_WARNING
@@ -877,7 +874,7 @@ int cpu_to_node(int cpu)
        }
        return per_cpu(x86_cpu_to_node_map, cpu);
 }
-EXPORT_SYMBOL(cpu_to_node);
+EXPORT_SYMBOL(__cpu_to_node);
 
 /*
  * Same function as cpu_to_node() but used if called before the
index 810080b..b1f9fdc 100644 (file)
 #ifndef _XTENSA_SCATTERLIST_H
 #define _XTENSA_SCATTERLIST_H
 
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
-       unsigned long   sg_magic;
-#endif
-       unsigned long   page_link;
-       unsigned int    offset;
-       dma_addr_t      dma_address;
-       unsigned int    length;
-};
-
-/*
- * These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns, or alternatively stop on the first sg_dma_len(sg) which
- * is 0.
- */
-#define sg_dma_address(sg)      ((sg)->dma_address)
-#define sg_dma_len(sg)          ((sg)->length)
-
+#include <asm-generic/scatterlist.h>
 
 #define ISA_DMA_THRESHOLD (~0UL)
 
index bf6b132..9fc630c 100644 (file)
@@ -162,7 +162,7 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb,
                topology_remove_dev(cpu);
                break;
        }
-       return rc ? NOTIFY_BAD : NOTIFY_OK;
+       return notifier_from_errno(rc);
 }
 
 static int __cpuinit topology_sysfs_init(void)
index e21175b..f09fc0e 100644 (file)
@@ -1121,5 +1121,12 @@ config DEVPORT
 
 source "drivers/s390/char/Kconfig"
 
+config RAMOOPS
+       tristate "Log panic/oops to a RAM buffer"
+       default n
+       help
+         This enables panic and oops messages to be logged to a circular
+         buffer in RAM where it can be read back at some later point.
+
 endmenu
 
index d39be4c..88d6eac 100644 (file)
@@ -108,6 +108,7 @@ obj-$(CONFIG_HANGCHECK_TIMER)       += hangcheck-timer.o
 obj-$(CONFIG_TCG_TPM)          += tpm/
 
 obj-$(CONFIG_PS3_FLASH)                += ps3flash.o
+obj-$(CONFIG_RAMOOPS)          += ramoops.o
 
 obj-$(CONFIG_JS_RTC)           += js-rtc.o
 js-rtc-y = rtc.o
index 67ea3a6..70312da 100644 (file)
@@ -384,7 +384,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
 {
        u32 httfea,baseaddr,enuscr;
        struct pci_dev *dev1;
-       int i;
+       int i, ret;
        unsigned size = amd64_fetch_size();
 
        dev_info(&pdev->dev, "setting up ULi AGP\n");
@@ -400,15 +400,18 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
 
        if (i == ARRAY_SIZE(uli_sizes)) {
                dev_info(&pdev->dev, "no ULi size found for %d\n", size);
-               return -ENODEV;
+               ret = -ENODEV;
+               goto put;
        }
 
        /* shadow x86-64 registers into ULi registers */
        pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea);
 
        /* if x86-64 aperture base is beyond 4G, exit here */
-       if ((httfea & 0x7fff) >> (32 - 25))
-               return -ENODEV;
+       if ((httfea & 0x7fff) >> (32 - 25)) {
+               ret = -ENODEV;
+               goto put;
+       }
 
        httfea = (httfea& 0x7fff) << 25;
 
@@ -420,9 +423,10 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
        enuscr= httfea+ (size * 1024 * 1024) - 1;
        pci_write_config_dword(dev1, ULI_X86_64_HTT_FEA_REG, httfea);
        pci_write_config_dword(dev1, ULI_X86_64_ENU_SCR_REG, enuscr);
-
+       ret = 0;
+put:
        pci_dev_put(dev1);
-       return 0;
+       return ret;
 }
 
 
@@ -441,7 +445,7 @@ static int nforce3_agp_init(struct pci_dev *pdev)
 {
        u32 tmp, apbase, apbar, aplimit;
        struct pci_dev *dev1;
-       int i;
+       int i, ret;
        unsigned size = amd64_fetch_size();
 
        dev_info(&pdev->dev, "setting up Nforce3 AGP\n");
@@ -458,7 +462,8 @@ static int nforce3_agp_init(struct pci_dev *pdev)
 
        if (i == ARRAY_SIZE(nforce3_sizes)) {
                dev_info(&pdev->dev, "no NForce3 size found for %d\n", size);
-               return -ENODEV;
+               ret = -ENODEV;
+               goto put;
        }
 
        pci_read_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, &tmp);
@@ -472,7 +477,8 @@ static int nforce3_agp_init(struct pci_dev *pdev)
        /* if x86-64 aperture base is beyond 4G, exit here */
        if ( (apbase & 0x7fff) >> (32 - 25) ) {
                dev_info(&pdev->dev, "aperture base > 4G\n");
-               return -ENODEV;
+               ret = -ENODEV;
+               goto put;
        }
 
        apbase = (apbase & 0x7fff) << 25;
@@ -488,9 +494,11 @@ static int nforce3_agp_init(struct pci_dev *pdev)
        pci_write_config_dword(dev1, NVIDIA_X86_64_1_APBASE2, apbase);
        pci_write_config_dword(dev1, NVIDIA_X86_64_1_APLIMIT2, aplimit);
 
+       ret = 0;
+put:
        pci_dev_put(dev1);
 
-       return 0;
+       return ret;
 }
 
 static int __devinit agp_amd64_probe(struct pci_dev *pdev,
index 56b2767..4f8d60c 100644 (file)
@@ -84,6 +84,7 @@ static char *serial_version = "4.30";
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/bitops.h>
+#include <linux/platform_device.h>
 
 #include <asm/setup.h>
 
@@ -1954,29 +1955,16 @@ static const struct tty_operations serial_ops = {
 /*
  * The serial driver boot-time initialization code!
  */
-static int __init rs_init(void)
+static int __init amiga_serial_probe(struct platform_device *pdev)
 {
        unsigned long flags;
        struct serial_state * state;
        int error;
 
-       if (!MACH_IS_AMIGA || !AMIGAHW_PRESENT(AMI_SERIAL))
-               return -ENODEV;
-
        serial_driver = alloc_tty_driver(1);
        if (!serial_driver)
                return -ENOMEM;
 
-       /*
-        *  We request SERDAT and SERPER only, because the serial registers are
-        *  too spreaded over the custom register space
-        */
-       if (!request_mem_region(CUSTOM_PHYSADDR+0x30, 4,
-                               "amiserial [Paula]")) {
-               error = -EBUSY;
-               goto fail_put_tty_driver;
-       }
-
        IRQ_ports = NULL;
 
        show_serial_version();
@@ -1998,7 +1986,7 @@ static int __init rs_init(void)
 
        error = tty_register_driver(serial_driver);
        if (error)
-               goto fail_release_mem_region;
+               goto fail_put_tty_driver;
 
        state = rs_table;
        state->magic = SSTATE_MAGIC;
@@ -2050,23 +2038,24 @@ static int __init rs_init(void)
        ciab.ddra |= (SER_DTR | SER_RTS);   /* outputs */
        ciab.ddra &= ~(SER_DCD | SER_CTS | SER_DSR);  /* inputs */
 
+       platform_set_drvdata(pdev, state);
+
        return 0;
 
 fail_free_irq:
        free_irq(IRQ_AMIGA_TBE, state);
 fail_unregister:
        tty_unregister_driver(serial_driver);
-fail_release_mem_region:
-       release_mem_region(CUSTOM_PHYSADDR+0x30, 4);
 fail_put_tty_driver:
        put_tty_driver(serial_driver);
        return error;
 }
 
-static __exit void rs_exit(void) 
+static int __exit amiga_serial_remove(struct platform_device *pdev)
 {
        int error;
-       struct async_struct *info = rs_table[0].info;
+       struct serial_state *state = platform_get_drvdata(pdev);
+       struct async_struct *info = state->info;
 
        /* printk("Unloading %s: version %s\n", serial_name, serial_version); */
        tasklet_kill(&info->tlet);
@@ -2075,19 +2064,38 @@ static __exit void rs_exit(void)
                       error);
        put_tty_driver(serial_driver);
 
-       if (info) {
-         rs_table[0].info = NULL;
-         kfree(info);
-       }
+       rs_table[0].info = NULL;
+       kfree(info);
 
        free_irq(IRQ_AMIGA_TBE, rs_table);
        free_irq(IRQ_AMIGA_RBF, rs_table);
 
-       release_mem_region(CUSTOM_PHYSADDR+0x30, 4);
+       platform_set_drvdata(pdev, NULL);
+
+       return error;
+}
+
+static struct platform_driver amiga_serial_driver = {
+       .remove = __exit_p(amiga_serial_remove),
+       .driver   = {
+               .name   = "amiga-serial",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init amiga_serial_init(void)
+{
+       return platform_driver_probe(&amiga_serial_driver, amiga_serial_probe);
+}
+
+module_init(amiga_serial_init);
+
+static void __exit amiga_serial_exit(void)
+{
+       platform_driver_unregister(&amiga_serial_driver);
 }
 
-module_init(rs_init)
-module_exit(rs_exit)
+module_exit(amiga_serial_exit);
 
 
 #if defined(CONFIG_SERIAL_CONSOLE) && !defined(MODULE)
@@ -2154,3 +2162,4 @@ console_initcall(amiserial_console_init);
 #endif /* CONFIG_SERIAL_CONSOLE && !MODULE */
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:amiga-serial");
index 63313a3..f4ae0e0 100644 (file)
@@ -703,14 +703,9 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        /* In general, the device is only openable by root anyway, so we're not
           particularly concerned that bogus ioctls can flood the console. */
 
-       adgl = kmalloc(sizeof(struct st_ram_io), GFP_KERNEL);
-       if (!adgl)
-               return -ENOMEM;
-
-       if (copy_from_user(adgl, argp, sizeof(struct st_ram_io))) {
-               kfree(adgl);
-               return -EFAULT;
-       }
+       adgl = memdup_user(argp, sizeof(struct st_ram_io));
+       if (IS_ERR(adgl))
+               return PTR_ERR(adgl);
 
        lock_kernel();  
        IndexCard = adgl->num_card-1;
index c6ad423..4f3f8c9 100644 (file)
@@ -2505,12 +2505,11 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum,
                        return rv;
                }
 
-               printk(KERN_INFO
-                      "ipmi: Found new BMC (man_id: 0x%6.6x, "
-                      " prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n",
-                      bmc->id.manufacturer_id,
-                      bmc->id.product_id,
-                      bmc->id.device_id);
+               dev_info(intf->si_dev, "Found new BMC (man_id: 0x%6.6x, "
+                        "prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n",
+                        bmc->id.manufacturer_id,
+                        bmc->id.product_id,
+                        bmc->id.device_id);
        }
 
        /*
@@ -4037,8 +4036,8 @@ static void ipmi_request_event(void)
 
 static struct timer_list ipmi_timer;
 
-/* Call every ~100 ms. */
-#define IPMI_TIMEOUT_TIME      100
+/* Call every ~1000 ms. */
+#define IPMI_TIMEOUT_TIME      1000
 
 /* How many jiffies does it take to get to the timeout time. */
 #define IPMI_TIMEOUT_JIFFIES   ((IPMI_TIMEOUT_TIME * HZ) / 1000)
index 47ffe4a..35603dd 100644 (file)
@@ -107,6 +107,14 @@ enum si_type {
 };
 static char *si_to_str[] = { "kcs", "smic", "bt" };
 
+enum ipmi_addr_src {
+       SI_INVALID = 0, SI_HOTMOD, SI_HARDCODED, SI_SPMI, SI_ACPI, SI_SMBIOS,
+       SI_PCI, SI_DEVICETREE, SI_DEFAULT
+};
+static char *ipmi_addr_src_to_str[] = { NULL, "hotmod", "hardcoded", "SPMI",
+                                       "ACPI", "SMBIOS", "PCI",
+                                       "device-tree", "default" };
+
 #define DEVICE_NAME "ipmi_si"
 
 static struct platform_driver ipmi_driver = {
@@ -188,7 +196,7 @@ struct smi_info {
        int (*irq_setup)(struct smi_info *info);
        void (*irq_cleanup)(struct smi_info *info);
        unsigned int io_size;
-       char *addr_source; /* ACPI, PCI, SMBIOS, hardcode, default. */
+       enum ipmi_addr_src addr_source; /* ACPI, PCI, SMBIOS, hardcode, etc. */
        void (*addr_source_cleanup)(struct smi_info *info);
        void *addr_source_data;
 
@@ -300,6 +308,7 @@ static int num_max_busy_us;
 
 static int unload_when_empty = 1;
 
+static int add_smi(struct smi_info *smi);
 static int try_smi_init(struct smi_info *smi);
 static void cleanup_one_si(struct smi_info *to_clean);
 
@@ -314,9 +323,14 @@ static void deliver_recv_msg(struct smi_info *smi_info,
 {
        /* Deliver the message to the upper layer with the lock
           released. */
-       spin_unlock(&(smi_info->si_lock));
-       ipmi_smi_msg_received(smi_info->intf, msg);
-       spin_lock(&(smi_info->si_lock));
+
+       if (smi_info->run_to_completion) {
+               ipmi_smi_msg_received(smi_info->intf, msg);
+       } else {
+               spin_unlock(&(smi_info->si_lock));
+               ipmi_smi_msg_received(smi_info->intf, msg);
+               spin_lock(&(smi_info->si_lock));
+       }
 }
 
 static void return_hosed_msg(struct smi_info *smi_info, int cCode)
@@ -445,6 +459,9 @@ static inline void disable_si_irq(struct smi_info *smi_info)
        if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
                start_disable_irq(smi_info);
                smi_info->interrupt_disabled = 1;
+               if (!atomic_read(&smi_info->stop_operation))
+                       mod_timer(&smi_info->si_timer,
+                                 jiffies + SI_TIMEOUT_JIFFIES);
        }
 }
 
@@ -576,9 +593,8 @@ static void handle_transaction_done(struct smi_info *smi_info)
                smi_info->handlers->get_result(smi_info->si_sm, msg, 3);
                if (msg[2] != 0) {
                        /* Error clearing flags */
-                       printk(KERN_WARNING
-                              "ipmi_si: Error clearing flags: %2.2x\n",
-                              msg[2]);
+                       dev_warn(smi_info->dev,
+                                "Error clearing flags: %2.2x\n", msg[2]);
                }
                if (smi_info->si_state == SI_CLEARING_FLAGS_THEN_SET_IRQ)
                        start_enable_irq(smi_info);
@@ -670,9 +686,8 @@ static void handle_transaction_done(struct smi_info *smi_info)
                /* We got the flags from the SMI, now handle them. */
                smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
                if (msg[2] != 0) {
-                       printk(KERN_WARNING
-                              "ipmi_si: Could not enable interrupts"
-                              ", failed get, using polled mode.\n");
+                       dev_warn(smi_info->dev, "Could not enable interrupts"
+                                ", failed get, using polled mode.\n");
                        smi_info->si_state = SI_NORMAL;
                } else {
                        msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
@@ -693,11 +708,11 @@ static void handle_transaction_done(struct smi_info *smi_info)
 
                /* We got the flags from the SMI, now handle them. */
                smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
-               if (msg[2] != 0) {
-                       printk(KERN_WARNING
-                              "ipmi_si: Could not enable interrupts"
-                              ", failed set, using polled mode.\n");
-               }
+               if (msg[2] != 0)
+                       dev_warn(smi_info->dev, "Could not enable interrupts"
+                                ", failed set, using polled mode.\n");
+               else
+                       smi_info->interrupt_disabled = 0;
                smi_info->si_state = SI_NORMAL;
                break;
        }
@@ -709,9 +724,8 @@ static void handle_transaction_done(struct smi_info *smi_info)
                /* We got the flags from the SMI, now handle them. */
                smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
                if (msg[2] != 0) {
-                       printk(KERN_WARNING
-                              "ipmi_si: Could not disable interrupts"
-                              ", failed get.\n");
+                       dev_warn(smi_info->dev, "Could not disable interrupts"
+                                ", failed get.\n");
                        smi_info->si_state = SI_NORMAL;
                } else {
                        msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
@@ -733,9 +747,8 @@ static void handle_transaction_done(struct smi_info *smi_info)
                /* We got the flags from the SMI, now handle them. */
                smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
                if (msg[2] != 0) {
-                       printk(KERN_WARNING
-                              "ipmi_si: Could not disable interrupts"
-                              ", failed set.\n");
+                       dev_warn(smi_info->dev, "Could not disable interrupts"
+                                ", failed set.\n");
                }
                smi_info->si_state = SI_NORMAL;
                break;
@@ -877,6 +890,11 @@ static void sender(void                *send_info,
        printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
 #endif
 
+       mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES);
+
+       if (smi_info->thread)
+               wake_up_process(smi_info->thread);
+
        if (smi_info->run_to_completion) {
                /*
                 * If we are running to completion, then throw it in
@@ -997,6 +1015,8 @@ static int ipmi_thread(void *data)
                        ; /* do nothing */
                else if (smi_result == SI_SM_CALL_WITH_DELAY && busy_wait)
                        schedule();
+               else if (smi_result == SI_SM_IDLE)
+                       schedule_timeout_interruptible(100);
                else
                        schedule_timeout_interruptible(0);
        }
@@ -1039,6 +1059,7 @@ static void smi_timeout(unsigned long data)
        unsigned long     flags;
        unsigned long     jiffies_now;
        long              time_diff;
+       long              timeout;
 #ifdef DEBUG_TIMING
        struct timeval    t;
 #endif
@@ -1059,9 +1080,9 @@ static void smi_timeout(unsigned long data)
 
        if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
                /* Running with interrupts, only do long timeouts. */
-               smi_info->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES;
+               timeout = jiffies + SI_TIMEOUT_JIFFIES;
                smi_inc_stat(smi_info, long_timeouts);
-               goto do_add_timer;
+               goto do_mod_timer;
        }
 
        /*
@@ -1070,14 +1091,15 @@ static void smi_timeout(unsigned long data)
         */
        if (smi_result == SI_SM_CALL_WITH_DELAY) {
                smi_inc_stat(smi_info, short_timeouts);
-               smi_info->si_timer.expires = jiffies + 1;
+               timeout = jiffies + 1;
        } else {
                smi_inc_stat(smi_info, long_timeouts);
-               smi_info->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES;
+               timeout = jiffies + SI_TIMEOUT_JIFFIES;
        }
 
- do_add_timer:
-       add_timer(&(smi_info->si_timer));
+ do_mod_timer:
+       if (smi_result != SI_SM_IDLE)
+               mod_timer(&(smi_info->si_timer), timeout);
 }
 
 static irqreturn_t si_irq_handler(int irq, void *data)
@@ -1144,10 +1166,10 @@ static int smi_start_processing(void       *send_info,
                new_smi->thread = kthread_run(ipmi_thread, new_smi,
                                              "kipmi%d", new_smi->intf_num);
                if (IS_ERR(new_smi->thread)) {
-                       printk(KERN_NOTICE "ipmi_si_intf: Could not start"
-                              " kernel thread due to error %ld, only using"
-                              " timers to drive the interface\n",
-                              PTR_ERR(new_smi->thread));
+                       dev_notice(new_smi->dev, "Could not start"
+                                  " kernel thread due to error %ld, only using"
+                                  " timers to drive the interface\n",
+                                  PTR_ERR(new_smi->thread));
                        new_smi->thread = NULL;
                }
        }
@@ -1308,14 +1330,13 @@ static int std_irq_setup(struct smi_info *info)
                                 DEVICE_NAME,
                                 info);
        if (rv) {
-               printk(KERN_WARNING
-                      "ipmi_si: %s unable to claim interrupt %d,"
-                      " running polled\n",
-                      DEVICE_NAME, info->irq);
+               dev_warn(info->dev, "%s unable to claim interrupt %d,"
+                        " running polled\n",
+                        DEVICE_NAME, info->irq);
                info->irq = 0;
        } else {
                info->irq_cleanup = std_irq_cleanup;
-               printk("  Using irq %d\n", info->irq);
+               dev_info(info->dev, "Using irq %d\n", info->irq);
        }
 
        return rv;
@@ -1406,8 +1427,8 @@ static int port_setup(struct smi_info *info)
                info->io.outputb = port_outl;
                break;
        default:
-               printk(KERN_WARNING "ipmi_si: Invalid register size: %d\n",
-                      info->io.regsize);
+               dev_warn(info->dev, "Invalid register size: %d\n",
+                        info->io.regsize);
                return -EINVAL;
        }
 
@@ -1529,8 +1550,8 @@ static int mem_setup(struct smi_info *info)
                break;
 #endif
        default:
-               printk(KERN_WARNING "ipmi_si: Invalid register size: %d\n",
-                      info->io.regsize);
+               dev_warn(info->dev, "Invalid register size: %d\n",
+                        info->io.regsize);
                return -EINVAL;
        }
 
@@ -1755,7 +1776,7 @@ static int hotmod_handler(const char *val, struct kernel_param *kp)
                                goto out;
                        }
 
-                       info->addr_source = "hotmod";
+                       info->addr_source = SI_HOTMOD;
                        info->si_type = si_type;
                        info->io.addr_data = addr;
                        info->io.addr_type = addr_space;
@@ -1777,7 +1798,9 @@ static int hotmod_handler(const char *val, struct kernel_param *kp)
                                info->irq_setup = std_irq_setup;
                        info->slave_addr = ipmb;
 
-                       try_smi_init(info);
+                       if (!add_smi(info))
+                               if (try_smi_init(info))
+                                       cleanup_one_si(info);
                } else {
                        /* remove */
                        struct smi_info *e, *tmp_e;
@@ -1813,7 +1836,8 @@ static __devinit void hardcode_find_bmc(void)
                if (!info)
                        return;
 
-               info->addr_source = "hardcoded";
+               info->addr_source = SI_HARDCODED;
+               printk(KERN_INFO PFX "probing via hardcoded address\n");
 
                if (!si_type[i] || strcmp(si_type[i], "kcs") == 0) {
                        info->si_type = SI_KCS;
@@ -1822,8 +1846,7 @@ static __devinit void hardcode_find_bmc(void)
                } else if (strcmp(si_type[i], "bt") == 0) {
                        info->si_type = SI_BT;
                } else {
-                       printk(KERN_WARNING
-                              "ipmi_si: Interface type specified "
+                       printk(KERN_WARNING PFX "Interface type specified "
                               "for interface %d, was invalid: %s\n",
                               i, si_type[i]);
                        kfree(info);
@@ -1841,11 +1864,9 @@ static __devinit void hardcode_find_bmc(void)
                        info->io.addr_data = addrs[i];
                        info->io.addr_type = IPMI_MEM_ADDR_SPACE;
                } else {
-                       printk(KERN_WARNING
-                              "ipmi_si: Interface type specified "
-                              "for interface %d, "
-                              "but port and address were not set or "
-                              "set to zero.\n", i);
+                       printk(KERN_WARNING PFX "Interface type specified "
+                              "for interface %d, but port and address were "
+                              "not set or set to zero.\n", i);
                        kfree(info);
                        continue;
                }
@@ -1863,7 +1884,9 @@ static __devinit void hardcode_find_bmc(void)
                        info->irq_setup = std_irq_setup;
                info->slave_addr = slave_addrs[i];
 
-               try_smi_init(info);
+               if (!add_smi(info))
+                       if (try_smi_init(info))
+                               cleanup_one_si(info);
        }
 }
 
@@ -1923,15 +1946,13 @@ static int acpi_gpe_irq_setup(struct smi_info *info)
                                          &ipmi_acpi_gpe,
                                          info);
        if (status != AE_OK) {
-               printk(KERN_WARNING
-                      "ipmi_si: %s unable to claim ACPI GPE %d,"
-                      " running polled\n",
-                      DEVICE_NAME, info->irq);
+               dev_warn(info->dev, "%s unable to claim ACPI GPE %d,"
+                        " running polled\n", DEVICE_NAME, info->irq);
                info->irq = 0;
                return -EINVAL;
        } else {
                info->irq_cleanup = acpi_gpe_irq_cleanup;
-               printk("  Using ACPI GPE %d\n", info->irq);
+               dev_info(info->dev, "Using ACPI GPE %d\n", info->irq);
                return 0;
        }
 }
@@ -1989,8 +2010,8 @@ static __devinit int try_init_spmi(struct SPMITable *spmi)
        u8               addr_space;
 
        if (spmi->IPMIlegacy != 1) {
-           printk(KERN_INFO "IPMI: Bad SPMI legacy %d\n", spmi->IPMIlegacy);
-           return -ENODEV;
+               printk(KERN_INFO PFX "Bad SPMI legacy %d\n", spmi->IPMIlegacy);
+               return -ENODEV;
        }
 
        if (spmi->addr.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
@@ -2000,11 +2021,12 @@ static __devinit int try_init_spmi(struct SPMITable *spmi)
 
        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
-               printk(KERN_ERR "ipmi_si: Could not allocate SI data (3)\n");
+               printk(KERN_ERR PFX "Could not allocate SI data (3)\n");
                return -ENOMEM;
        }
 
-       info->addr_source = "SPMI";
+       info->addr_source = SI_SPMI;
+       printk(KERN_INFO PFX "probing via SPMI\n");
 
        /* Figure out the interface type. */
        switch (spmi->InterfaceType) {
@@ -2018,8 +2040,8 @@ static __devinit int try_init_spmi(struct SPMITable *spmi)
                info->si_type = SI_BT;
                break;
        default:
-               printk(KERN_INFO "ipmi_si: Unknown ACPI/SPMI SI type %d\n",
-                       spmi->InterfaceType);
+               printk(KERN_INFO PFX "Unknown ACPI/SPMI SI type %d\n",
+                      spmi->InterfaceType);
                kfree(info);
                return -EIO;
        }
@@ -2055,13 +2077,12 @@ static __devinit int try_init_spmi(struct SPMITable *spmi)
                info->io.addr_type = IPMI_IO_ADDR_SPACE;
        } else {
                kfree(info);
-               printk(KERN_WARNING
-                      "ipmi_si: Unknown ACPI I/O Address type\n");
+               printk(KERN_WARNING PFX "Unknown ACPI I/O Address type\n");
                return -EIO;
        }
        info->io.addr_data = spmi->addr.address;
 
-       try_smi_init(info);
+       add_smi(info);
 
        return 0;
 }
@@ -2093,6 +2114,7 @@ static int __devinit ipmi_pnp_probe(struct pnp_dev *dev,
 {
        struct acpi_device *acpi_dev;
        struct smi_info *info;
+       struct resource *res;
        acpi_handle handle;
        acpi_status status;
        unsigned long long tmp;
@@ -2105,7 +2127,8 @@ static int __devinit ipmi_pnp_probe(struct pnp_dev *dev,
        if (!info)
                return -ENOMEM;
 
-       info->addr_source = "ACPI";
+       info->addr_source = SI_ACPI;
+       printk(KERN_INFO PFX "probing via ACPI\n");
 
        handle = acpi_dev->handle;
 
@@ -2125,22 +2148,26 @@ static int __devinit ipmi_pnp_probe(struct pnp_dev *dev,
                info->si_type = SI_BT;
                break;
        default:
-               dev_info(&dev->dev, "unknown interface type %lld\n", tmp);
+               dev_info(&dev->dev, "unknown IPMI type %lld\n", tmp);
                goto err_free;
        }
 
-       if (pnp_port_valid(dev, 0)) {
+       res = pnp_get_resource(dev, IORESOURCE_IO, 0);
+       if (res) {
                info->io_setup = port_setup;
                info->io.addr_type = IPMI_IO_ADDR_SPACE;
-               info->io.addr_data = pnp_port_start(dev, 0);
-       } else if (pnp_mem_valid(dev, 0)) {
-               info->io_setup = mem_setup;
-               info->io.addr_type = IPMI_MEM_ADDR_SPACE;
-               info->io.addr_data = pnp_mem_start(dev, 0);
        } else {
+               res = pnp_get_resource(dev, IORESOURCE_MEM, 0);
+               if (res) {
+                       info->io_setup = mem_setup;
+                       info->io.addr_type = IPMI_MEM_ADDR_SPACE;
+               }
+       }
+       if (!res) {
                dev_err(&dev->dev, "no I/O or memory address\n");
                goto err_free;
        }
+       info->io.addr_data = res->start;
 
        info->io.regspacing = DEFAULT_REGSPACING;
        info->io.regsize = DEFAULT_REGSPACING;
@@ -2156,10 +2183,14 @@ static int __devinit ipmi_pnp_probe(struct pnp_dev *dev,
                info->irq_setup = std_irq_setup;
        }
 
-       info->dev = &acpi_dev->dev;
+       info->dev = &dev->dev;
        pnp_set_drvdata(dev, info);
 
-       return try_smi_init(info);
+       dev_info(info->dev, "%pR regsize %d spacing %d irq %d\n",
+                res, info->io.regsize, info->io.regspacing,
+                info->irq);
+
+       return add_smi(info);
 
 err_free:
        kfree(info);
@@ -2264,12 +2295,12 @@ static __devinit void try_init_dmi(struct dmi_ipmi_data *ipmi_data)
 
        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
-               printk(KERN_ERR
-                      "ipmi_si: Could not allocate SI data\n");
+               printk(KERN_ERR PFX "Could not allocate SI data\n");
                return;
        }
 
-       info->addr_source = "SMBIOS";
+       info->addr_source = SI_SMBIOS;
+       printk(KERN_INFO PFX "probing via SMBIOS\n");
 
        switch (ipmi_data->type) {
        case 0x01: /* KCS */
@@ -2299,8 +2330,7 @@ static __devinit void try_init_dmi(struct dmi_ipmi_data *ipmi_data)
 
        default:
                kfree(info);
-               printk(KERN_WARNING
-                      "ipmi_si: Unknown SMBIOS I/O Address type: %d.\n",
+               printk(KERN_WARNING PFX "Unknown SMBIOS I/O Address type: %d\n",
                       ipmi_data->addr_space);
                return;
        }
@@ -2318,7 +2348,7 @@ static __devinit void try_init_dmi(struct dmi_ipmi_data *ipmi_data)
        if (info->irq)
                info->irq_setup = std_irq_setup;
 
-       try_smi_init(info);
+       add_smi(info);
 }
 
 static void __devinit dmi_find_bmc(void)
@@ -2368,7 +2398,8 @@ static int __devinit ipmi_pci_probe(struct pci_dev *pdev,
        if (!info)
                return -ENOMEM;
 
-       info->addr_source = "PCI";
+       info->addr_source = SI_PCI;
+       dev_info(&pdev->dev, "probing via PCI");
 
        switch (class_type) {
        case PCI_ERMC_CLASSCODE_TYPE_SMIC:
@@ -2385,15 +2416,13 @@ static int __devinit ipmi_pci_probe(struct pci_dev *pdev,
 
        default:
                kfree(info);
-               printk(KERN_INFO "ipmi_si: %s: Unknown IPMI type: %d\n",
-                      pci_name(pdev), class_type);
+               dev_info(&pdev->dev, "Unknown IPMI type: %d\n", class_type);
                return -ENOMEM;
        }
 
        rv = pci_enable_device(pdev);
        if (rv) {
-               printk(KERN_ERR "ipmi_si: %s: couldn't enable PCI device\n",
-                      pci_name(pdev));
+               dev_err(&pdev->dev, "couldn't enable PCI device\n");
                kfree(info);
                return rv;
        }
@@ -2421,7 +2450,11 @@ static int __devinit ipmi_pci_probe(struct pci_dev *pdev,
        info->dev = &pdev->dev;
        pci_set_drvdata(pdev, info);
 
-       return try_smi_init(info);
+       dev_info(&pdev->dev, "%pR regsize %d spacing %d irq %d\n",
+               &pdev->resource[0], info->io.regsize, info->io.regspacing,
+               info->irq);
+
+       return add_smi(info);
 }
 
 static void __devexit ipmi_pci_remove(struct pci_dev *pdev)
@@ -2473,7 +2506,7 @@ static int __devinit ipmi_of_probe(struct of_device *dev,
        int ret;
        int proplen;
 
-       dev_info(&dev->dev, PFX "probing via device tree\n");
+       dev_info(&dev->dev, "probing via device tree\n");
 
        ret = of_address_to_resource(np, 0, &resource);
        if (ret) {
@@ -2503,12 +2536,12 @@ static int __devinit ipmi_of_probe(struct of_device *dev,
 
        if (!info) {
                dev_err(&dev->dev,
-                       PFX "could not allocate memory for OF probe\n");
+                       "could not allocate memory for OF probe\n");
                return -ENOMEM;
        }
 
        info->si_type           = (enum si_type) match->data;
-       info->addr_source       = "device-tree";
+       info->addr_source       = SI_DEVICETREE;
        info->irq_setup         = std_irq_setup;
 
        if (resource.flags & IORESOURCE_IO) {
@@ -2528,13 +2561,13 @@ static int __devinit ipmi_of_probe(struct of_device *dev,
        info->irq               = irq_of_parse_and_map(dev->dev.of_node, 0);
        info->dev               = &dev->dev;
 
-       dev_dbg(&dev->dev, "addr 0x%lx regsize %d spacing %d irq %x\n",
+       dev_dbg(&dev->dev, "addr 0x%lx regsize %d spacing %d irq %d\n",
                info->io.addr_data, info->io.regsize, info->io.regspacing,
                info->irq);
 
        dev_set_drvdata(&dev->dev, info);
 
-       return try_smi_init(info);
+       return add_smi(info);
 }
 
 static int __devexit ipmi_of_remove(struct of_device *dev)
@@ -2643,9 +2676,8 @@ static int try_enable_event_buffer(struct smi_info *smi_info)
 
        rv = wait_for_msg_done(smi_info);
        if (rv) {
-               printk(KERN_WARNING
-                      "ipmi_si: Error getting response from get global,"
-                      " enables command, the event buffer is not"
+               printk(KERN_WARNING PFX "Error getting response from get"
+                      " global enables command, the event buffer is not"
                       " enabled.\n");
                goto out;
        }
@@ -2657,10 +2689,8 @@ static int try_enable_event_buffer(struct smi_info *smi_info)
                        resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
                        resp[1] != IPMI_GET_BMC_GLOBAL_ENABLES_CMD   ||
                        resp[2] != 0) {
-               printk(KERN_WARNING
-                      "ipmi_si: Invalid return from get global"
-                      " enables command, cannot enable the event"
-                      " buffer.\n");
+               printk(KERN_WARNING PFX "Invalid return from get global"
+                      " enables command, cannot enable the event buffer.\n");
                rv = -EINVAL;
                goto out;
        }
@@ -2676,9 +2706,8 @@ static int try_enable_event_buffer(struct smi_info *smi_info)
 
        rv = wait_for_msg_done(smi_info);
        if (rv) {
-               printk(KERN_WARNING
-                      "ipmi_si: Error getting response from set global,"
-                      " enables command, the event buffer is not"
+               printk(KERN_WARNING PFX "Error getting response from set"
+                      " global, enables command, the event buffer is not"
                       " enabled.\n");
                goto out;
        }
@@ -2689,10 +2718,8 @@ static int try_enable_event_buffer(struct smi_info *smi_info)
        if (resp_len < 3 ||
                        resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
                        resp[1] != IPMI_SET_BMC_GLOBAL_ENABLES_CMD) {
-               printk(KERN_WARNING
-                      "ipmi_si: Invalid return from get global,"
-                      "enables command, not enable the event"
-                      " buffer.\n");
+               printk(KERN_WARNING PFX "Invalid return from get global,"
+                      "enables command, not enable the event buffer.\n");
                rv = -EINVAL;
                goto out;
        }
@@ -2951,7 +2978,7 @@ static __devinit void default_find_bmc(void)
                if (!info)
                        return;
 
-               info->addr_source = NULL;
+               info->addr_source = SI_DEFAULT;
 
                info->si_type = ipmi_defaults[i].type;
                info->io_setup = port_setup;
@@ -2963,14 +2990,16 @@ static __devinit void default_find_bmc(void)
                info->io.regsize = DEFAULT_REGSPACING;
                info->io.regshift = 0;
 
-               if (try_smi_init(info) == 0) {
-                       /* Found one... */
-                       printk(KERN_INFO "ipmi_si: Found default %s state"
-                              " machine at %s address 0x%lx\n",
-                              si_to_str[info->si_type],
-                              addr_space_to_str[info->io.addr_type],
-                              info->io.addr_data);
-                       return;
+               if (add_smi(info) == 0) {
+                       if ((try_smi_init(info)) == 0) {
+                               /* Found one... */
+                               printk(KERN_INFO PFX "Found default %s"
+                               " state machine at %s address 0x%lx\n",
+                               si_to_str[info->si_type],
+                               addr_space_to_str[info->io.addr_type],
+                               info->io.addr_data);
+                       } else
+                               cleanup_one_si(info);
                }
        }
 }
@@ -2989,34 +3018,48 @@ static int is_new_interface(struct smi_info *info)
        return 1;
 }
 
-static int try_smi_init(struct smi_info *new_smi)
+static int add_smi(struct smi_info *new_smi)
 {
-       int rv;
-       int i;
-
-       if (new_smi->addr_source) {
-               printk(KERN_INFO "ipmi_si: Trying %s-specified %s state"
-                      " machine at %s address 0x%lx, slave address 0x%x,"
-                      " irq %d\n",
-                      new_smi->addr_source,
-                      si_to_str[new_smi->si_type],
-                      addr_space_to_str[new_smi->io.addr_type],
-                      new_smi->io.addr_data,
-                      new_smi->slave_addr, new_smi->irq);
-       }
+       int rv = 0;
 
+       printk(KERN_INFO PFX "Adding %s-specified %s state machine",
+                       ipmi_addr_src_to_str[new_smi->addr_source],
+                       si_to_str[new_smi->si_type]);
        mutex_lock(&smi_infos_lock);
        if (!is_new_interface(new_smi)) {
-               printk(KERN_WARNING "ipmi_si: duplicate interface\n");
+               printk(KERN_CONT PFX "duplicate interface\n");
                rv = -EBUSY;
                goto out_err;
        }
 
+       printk(KERN_CONT "\n");
+
        /* So we know not to free it unless we have allocated one. */
        new_smi->intf = NULL;
        new_smi->si_sm = NULL;
        new_smi->handlers = NULL;
 
+       list_add_tail(&new_smi->link, &smi_infos);
+
+out_err:
+       mutex_unlock(&smi_infos_lock);
+       return rv;
+}
+
+static int try_smi_init(struct smi_info *new_smi)
+{
+       int rv = 0;
+       int i;
+
+       printk(KERN_INFO PFX "Trying %s-specified %s state"
+              " machine at %s address 0x%lx, slave address 0x%x,"
+              " irq %d\n",
+              ipmi_addr_src_to_str[new_smi->addr_source],
+              si_to_str[new_smi->si_type],
+              addr_space_to_str[new_smi->io.addr_type],
+              new_smi->io.addr_data,
+              new_smi->slave_addr, new_smi->irq);
+
        switch (new_smi->si_type) {
        case SI_KCS:
                new_smi->handlers = &kcs_smi_handlers;
@@ -3039,7 +3082,8 @@ static int try_smi_init(struct smi_info *new_smi)
        /* Allocate the state machine's data and initialize it. */
        new_smi->si_sm = kmalloc(new_smi->handlers->size(), GFP_KERNEL);
        if (!new_smi->si_sm) {
-               printk(KERN_ERR "Could not allocate state machine memory\n");
+               printk(KERN_ERR PFX
+                      "Could not allocate state machine memory\n");
                rv = -ENOMEM;
                goto out_err;
        }
@@ -3049,7 +3093,7 @@ static int try_smi_init(struct smi_info *new_smi)
        /* Now that we know the I/O size, we can set up the I/O. */
        rv = new_smi->io_setup(new_smi);
        if (rv) {
-               printk(KERN_ERR "Could not set up I/O space\n");
+               printk(KERN_ERR PFX "Could not set up I/O space\n");
                goto out_err;
        }
 
@@ -3059,8 +3103,7 @@ static int try_smi_init(struct smi_info *new_smi)
        /* Do low-level detection first. */
        if (new_smi->handlers->detect(new_smi->si_sm)) {
                if (new_smi->addr_source)
-                       printk(KERN_INFO "ipmi_si: Interface detection"
-                              " failed\n");
+                       printk(KERN_INFO PFX "Interface detection failed\n");
                rv = -ENODEV;
                goto out_err;
        }
@@ -3072,7 +3115,7 @@ static int try_smi_init(struct smi_info *new_smi)
        rv = try_get_dev_id(new_smi);
        if (rv) {
                if (new_smi->addr_source)
-                       printk(KERN_INFO "ipmi_si: There appears to be no BMC"
+                       printk(KERN_INFO PFX "There appears to be no BMC"
                               " at this location\n");
                goto out_err;
        }
@@ -3088,7 +3131,7 @@ static int try_smi_init(struct smi_info *new_smi)
        for (i = 0; i < SI_NUM_STATS; i++)
                atomic_set(&new_smi->stats[i], 0);
 
-       new_smi->interrupt_disabled = 0;
+       new_smi->interrupt_disabled = 1;
        atomic_set(&new_smi->stop_operation, 0);
        new_smi->intf_num = smi_num;
        smi_num++;
@@ -3114,9 +3157,8 @@ static int try_smi_init(struct smi_info *new_smi)
                new_smi->pdev = platform_device_alloc("ipmi_si",
                                                      new_smi->intf_num);
                if (!new_smi->pdev) {
-                       printk(KERN_ERR
-                              "ipmi_si_intf:"
-                              " Unable to allocate platform device\n");
+                       printk(KERN_ERR PFX
+                              "Unable to allocate platform device\n");
                        goto out_err;
                }
                new_smi->dev = &new_smi->pdev->dev;
@@ -3124,9 +3166,8 @@ static int try_smi_init(struct smi_info *new_smi)
 
                rv = platform_device_add(new_smi->pdev);
                if (rv) {
-                       printk(KERN_ERR
-                              "ipmi_si_intf:"
-                              " Unable to register system interface device:"
+                       printk(KERN_ERR PFX
+                              "Unable to register system interface device:"
                               " %d\n",
                               rv);
                        goto out_err;
@@ -3141,9 +3182,8 @@ static int try_smi_init(struct smi_info *new_smi)
                               "bmc",
                               new_smi->slave_addr);
        if (rv) {
-               printk(KERN_ERR
-                      "ipmi_si: Unable to register device: error %d\n",
-                      rv);
+               dev_err(new_smi->dev, "Unable to register device: error %d\n",
+                       rv);
                goto out_err_stop_timer;
        }
 
@@ -3151,9 +3191,7 @@ static int try_smi_init(struct smi_info *new_smi)
                                     type_file_read_proc,
                                     new_smi);
        if (rv) {
-               printk(KERN_ERR
-                      "ipmi_si: Unable to create proc entry: %d\n",
-                      rv);
+               dev_err(new_smi->dev, "Unable to create proc entry: %d\n", rv);
                goto out_err_stop_timer;
        }
 
@@ -3161,9 +3199,7 @@ static int try_smi_init(struct smi_info *new_smi)
                                     stat_file_read_proc,
                                     new_smi);
        if (rv) {
-               printk(KERN_ERR
-                      "ipmi_si: Unable to create proc entry: %d\n",
-                      rv);
+               dev_err(new_smi->dev, "Unable to create proc entry: %d\n", rv);
                goto out_err_stop_timer;
        }
 
@@ -3171,18 +3207,12 @@ static int try_smi_init(struct smi_info *new_smi)
                                     param_read_proc,
                                     new_smi);
        if (rv) {
-               printk(KERN_ERR
-                      "ipmi_si: Unable to create proc entry: %d\n",
-                      rv);
+               dev_err(new_smi->dev, "Unable to create proc entry: %d\n", rv);
                goto out_err_stop_timer;
        }
 
-       list_add_tail(&new_smi->link, &smi_infos);
-
-       mutex_unlock(&smi_infos_lock);
-
-       printk(KERN_INFO "IPMI %s interface initialized\n",
-              si_to_str[new_smi->si_type]);
+       dev_info(new_smi->dev, "IPMI %s interface initialized\n",
+                si_to_str[new_smi->si_type]);
 
        return 0;
 
@@ -3191,11 +3221,17 @@ static int try_smi_init(struct smi_info *new_smi)
        wait_for_timer_and_thread(new_smi);
 
  out_err:
-       if (new_smi->intf)
+       new_smi->interrupt_disabled = 1;
+
+       if (new_smi->intf) {
                ipmi_unregister_smi(new_smi->intf);
+               new_smi->intf = NULL;
+       }
 
-       if (new_smi->irq_cleanup)
+       if (new_smi->irq_cleanup) {
                new_smi->irq_cleanup(new_smi);
+               new_smi->irq_cleanup = NULL;
+       }
 
        /*
         * Wait until we know that we are out of any interrupt
@@ -3208,18 +3244,21 @@ static int try_smi_init(struct smi_info *new_smi)
                if (new_smi->handlers)
                        new_smi->handlers->cleanup(new_smi->si_sm);
                kfree(new_smi->si_sm);
+               new_smi->si_sm = NULL;
        }
-       if (new_smi->addr_source_cleanup)
+       if (new_smi->addr_source_cleanup) {
                new_smi->addr_source_cleanup(new_smi);
-       if (new_smi->io_cleanup)
+               new_smi->addr_source_cleanup = NULL;
+       }
+       if (new_smi->io_cleanup) {
                new_smi->io_cleanup(new_smi);
+               new_smi->io_cleanup = NULL;
+       }
 
-       if (new_smi->dev_registered)
+       if (new_smi->dev_registered) {
                platform_device_unregister(new_smi->pdev);
-
-       kfree(new_smi);
-
-       mutex_unlock(&smi_infos_lock);
+               new_smi->dev_registered = 0;
+       }
 
        return rv;
 }
@@ -3229,6 +3268,8 @@ static __devinit int init_ipmi_si(void)
        int  i;
        char *str;
        int  rv;
+       struct smi_info *e;
+       enum ipmi_addr_src type = SI_INVALID;
 
        if (initialized)
                return 0;
@@ -3237,9 +3278,7 @@ static __devinit int init_ipmi_si(void)
        /* Register the device drivers. */
        rv = driver_register(&ipmi_driver.driver);
        if (rv) {
-               printk(KERN_ERR
-                      "init_ipmi_si: Unable to register driver: %d\n",
-                      rv);
+               printk(KERN_ERR PFX "Unable to register driver: %d\n", rv);
                return rv;
        }
 
@@ -3263,38 +3302,81 @@ static __devinit int init_ipmi_si(void)
 
        hardcode_find_bmc();
 
-#ifdef CONFIG_DMI
-       dmi_find_bmc();
-#endif
+       /* If the user gave us a device, they presumably want us to use it */
+       mutex_lock(&smi_infos_lock);
+       if (!list_empty(&smi_infos)) {
+               mutex_unlock(&smi_infos_lock);
+               return 0;
+       }
+       mutex_unlock(&smi_infos_lock);
 
-#ifdef CONFIG_ACPI
-       spmi_find_bmc();
+#ifdef CONFIG_PCI
+       rv = pci_register_driver(&ipmi_pci_driver);
+       if (rv)
+               printk(KERN_ERR PFX "Unable to register PCI driver: %d\n", rv);
 #endif
+
 #ifdef CONFIG_ACPI
        pnp_register_driver(&ipmi_pnp_driver);
 #endif
 
-#ifdef CONFIG_PCI
-       rv = pci_register_driver(&ipmi_pci_driver);
-       if (rv)
-               printk(KERN_ERR
-                      "init_ipmi_si: Unable to register PCI driver: %d\n",
-                      rv);
+#ifdef CONFIG_DMI
+       dmi_find_bmc();
+#endif
+
+#ifdef CONFIG_ACPI
+       spmi_find_bmc();
 #endif
 
 #ifdef CONFIG_PPC_OF
        of_register_platform_driver(&ipmi_of_platform_driver);
 #endif
 
+       /* We prefer devices with interrupts, but in the case of a machine
+          with multiple BMCs we assume that there will be several instances
+          of a given type so if we succeed in registering a type then also
+          try to register everything else of the same type */
+
+       mutex_lock(&smi_infos_lock);
+       list_for_each_entry(e, &smi_infos, link) {
+               /* Try to register a device if it has an IRQ and we either
+                  haven't successfully registered a device yet or this
+                  device has the same type as one we successfully registered */
+               if (e->irq && (!type || e->addr_source == type)) {
+                       if (!try_smi_init(e)) {
+                               type = e->addr_source;
+                       }
+               }
+       }
+
+       /* type will only have been set if we successfully registered an si */
+       if (type) {
+               mutex_unlock(&smi_infos_lock);
+               return 0;
+       }
+
+       /* Fall back to the preferred device */
+
+       list_for_each_entry(e, &smi_infos, link) {
+               if (!e->irq && (!type || e->addr_source == type)) {
+                       if (!try_smi_init(e)) {
+                               type = e->addr_source;
+                       }
+               }
+       }
+       mutex_unlock(&smi_infos_lock);
+
+       if (type)
+               return 0;
+
        if (si_trydefaults) {
                mutex_lock(&smi_infos_lock);
                if (list_empty(&smi_infos)) {
                        /* No BMC was found, try defaults. */
                        mutex_unlock(&smi_infos_lock);
                        default_find_bmc();
-               } else {
+               } else
                        mutex_unlock(&smi_infos_lock);
-               }
        }
 
        mutex_lock(&smi_infos_lock);
@@ -3308,8 +3390,8 @@ static __devinit int init_ipmi_si(void)
                of_unregister_platform_driver(&ipmi_of_platform_driver);
 #endif
                driver_unregister(&ipmi_driver.driver);
-               printk(KERN_WARNING
-                      "ipmi_si: Unable to find any System Interface(s)\n");
+               printk(KERN_WARNING PFX
+                      "Unable to find any System Interface(s)\n");
                return -ENODEV;
        } else {
                mutex_unlock(&smi_infos_lock);
@@ -3320,7 +3402,7 @@ module_init(init_ipmi_si);
 
 static void cleanup_one_si(struct smi_info *to_clean)
 {
-       int           rv;
+       int           rv = 0;
        unsigned long flags;
 
        if (!to_clean)
@@ -3364,14 +3446,16 @@ static void cleanup_one_si(struct smi_info *to_clean)
                schedule_timeout_uninterruptible(1);
        }
 
-       rv = ipmi_unregister_smi(to_clean->intf);
+       if (to_clean->intf)
+               rv = ipmi_unregister_smi(to_clean->intf);
+
        if (rv) {
-               printk(KERN_ERR
-                      "ipmi_si: Unable to unregister device: errno=%d\n",
+               printk(KERN_ERR PFX "Unable to unregister device: errno=%d\n",
                       rv);
        }
 
-       to_clean->handlers->cleanup(to_clean->si_sm);
+       if (to_clean->handlers)
+               to_clean->handlers->cleanup(to_clean->si_sm);
 
        kfree(to_clean->si_sm);
 
index fdd3754..02abfdd 100644 (file)
@@ -287,12 +287,10 @@ static int register_device (int minor, struct pp_struct *pp)
        char *name;
        int fl;
 
-       name = kmalloc (strlen (CHRDEV) + 3, GFP_KERNEL);
+       name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
        if (name == NULL)
                return -ENOMEM;
 
-       sprintf (name, CHRDEV "%x", minor);
-
        port = parport_find_number (minor);
        if (!port) {
                printk (KERN_WARNING "%s: no associated port!\n", name);
diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
new file mode 100644 (file)
index 0000000..74f00b5
--- /dev/null
@@ -0,0 +1,162 @@
+/*
+ * RAM Oops/Panic logger
+ *
+ * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kmsg_dump.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+
+#define RAMOOPS_KERNMSG_HDR "===="
+#define RAMOOPS_HEADER_SIZE   (5 + sizeof(struct timeval))
+
+#define RECORD_SIZE 4096
+
+static ulong mem_address;
+module_param(mem_address, ulong, 0400);
+MODULE_PARM_DESC(mem_address,
+               "start of reserved RAM used to store oops/panic logs");
+
+static ulong mem_size;
+module_param(mem_size, ulong, 0400);
+MODULE_PARM_DESC(mem_size,
+               "size of reserved RAM used to store oops/panic logs");
+
+static int dump_oops = 1;
+module_param(dump_oops, int, 0600);
+MODULE_PARM_DESC(dump_oops,
+               "set to 1 to dump oopses, 0 to only dump panics (default 1)");
+
+static struct ramoops_context {
+       struct kmsg_dumper dump;
+       void *virt_addr;
+       phys_addr_t phys_addr;
+       unsigned long size;
+       int count;
+       int max_count;
+} oops_cxt;
+
+static void ramoops_do_dump(struct kmsg_dumper *dumper,
+               enum kmsg_dump_reason reason, const char *s1, unsigned long l1,
+               const char *s2, unsigned long l2)
+{
+       struct ramoops_context *cxt = container_of(dumper,
+                       struct ramoops_context, dump);
+       unsigned long s1_start, s2_start;
+       unsigned long l1_cpy, l2_cpy;
+       int res;
+       char *buf;
+       struct timeval timestamp;
+
+       /* Only dump oopses if dump_oops is set */
+       if (reason == KMSG_DUMP_OOPS && !dump_oops)
+               return;
+
+       buf = (char *)(cxt->virt_addr + (cxt->count * RECORD_SIZE));
+       memset(buf, '\0', RECORD_SIZE);
+       res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR);
+       buf += res;
+       do_gettimeofday(&timestamp);
+       res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec);
+       buf += res;
+
+       l2_cpy = min(l2, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE));
+       l1_cpy = min(l1, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE) - l2_cpy);
+
+       s2_start = l2 - l2_cpy;
+       s1_start = l1 - l1_cpy;
+
+       memcpy(buf, s1 + s1_start, l1_cpy);
+       memcpy(buf + l1_cpy, s2 + s2_start, l2_cpy);
+
+       cxt->count = (cxt->count + 1) % cxt->max_count;
+}
+
+static int __init ramoops_init(void)
+{
+       struct ramoops_context *cxt = &oops_cxt;
+       int err = -EINVAL;
+
+       if (!mem_size) {
+               printk(KERN_ERR "ramoops: invalid size specification");
+               goto fail3;
+       }
+
+       rounddown_pow_of_two(mem_size);
+
+       if (mem_size < RECORD_SIZE) {
+               printk(KERN_ERR "ramoops: size too small");
+               goto fail3;
+       }
+
+       cxt->max_count = mem_size / RECORD_SIZE;
+       cxt->count = 0;
+       cxt->size = mem_size;
+       cxt->phys_addr = mem_address;
+
+       if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
+               printk(KERN_ERR "ramoops: request mem region failed");
+               err = -EINVAL;
+               goto fail3;
+       }
+
+       cxt->virt_addr = ioremap(cxt->phys_addr,  cxt->size);
+       if (!cxt->virt_addr) {
+               printk(KERN_ERR "ramoops: ioremap failed");
+               goto fail2;
+       }
+
+       cxt->dump.dump = ramoops_do_dump;
+       err = kmsg_dump_register(&cxt->dump);
+       if (err) {
+               printk(KERN_ERR "ramoops: registering kmsg dumper failed");
+               goto fail1;
+       }
+
+       return 0;
+
+fail1:
+       iounmap(cxt->virt_addr);
+fail2:
+       release_mem_region(cxt->phys_addr, cxt->size);
+fail3:
+       return err;
+}
+
+static void __exit ramoops_exit(void)
+{
+       struct ramoops_context *cxt = &oops_cxt;
+
+       if (kmsg_dump_unregister(&cxt->dump) < 0)
+               printk(KERN_WARNING "ramoops: could not unregister kmsg_dumper");
+
+       iounmap(cxt->virt_addr);
+       release_mem_region(cxt->phys_addr, cxt->size);
+}
+
+
+module_init(ramoops_init);
+module_exit(ramoops_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marco Stornelli <marco.stornelli@gmail.com>");
+MODULE_DESCRIPTION("RAM Oops/Panic logger/driver");
index bd1d116..7cdb6ee 100644 (file)
@@ -3967,13 +3967,9 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op)
        font.charcount = op->charcount;
        font.height = op->height;
        font.width = op->width;
-       font.data = kmalloc(size, GFP_KERNEL);
-       if (!font.data)
-               return -ENOMEM;
-       if (copy_from_user(font.data, op->data, size)) {
-               kfree(font.data);
-               return -EFAULT;
-       }
+       font.data = memdup_user(op->data, size);
+       if (IS_ERR(font.data))
+               return PTR_ERR(font.data);
        acquire_console_sem();
        if (vc->vc_sw->con_font_set)
                rc = vc->vc_sw->con_font_set(vc, &font, op->flags);
index adc10a2..996c1bd 100644 (file)
@@ -774,7 +774,7 @@ static void i5000_clear_error(struct mem_ctl_info *mci)
 static void i5000_check_error(struct mem_ctl_info *mci)
 {
        struct i5000_error_info info;
-       debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
+       debugf4("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__);
        i5000_get_error_info(mci, &info);
        i5000_process_error_info(mci, &info, 1);
 }
@@ -1353,8 +1353,8 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
        int num_dimms_per_channel;
        int num_csrows;
 
-       debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
-               __func__,
+       debugf0("MC: %s: %s(), pdev bus %u dev=0x%x fn=0x%x\n",
+               __FILE__, __func__,
                pdev->bus->number,
                PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 
@@ -1389,7 +1389,7 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
                return -ENOMEM;
 
        kobject_get(&mci->edac_mci_kobj);
-       debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
+       debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci);
 
        mci->dev = &pdev->dev;  /* record ptr  to the generic device */
 
@@ -1432,8 +1432,8 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
 
        /* add this new MC control structure to EDAC's list of MCs */
        if (edac_mc_add_mc(mci)) {
-               debugf0("MC: " __FILE__
-                       ": %s(): failed edac_mc_add_mc()\n", __func__);
+               debugf0("MC: %s: %s(): failed edac_mc_add_mc()\n",
+                       __FILE__, __func__);
                /* FIXME: perhaps some code should go here that disables error
                 * reporting if we just enabled it
                 */
@@ -1478,7 +1478,7 @@ static int __devinit i5000_init_one(struct pci_dev *pdev,
 {
        int rc;
 
-       debugf0("MC: " __FILE__ ": %s()\n", __func__);
+       debugf0("MC: %s: %s()\n", __FILE__, __func__);
 
        /* wake up device */
        rc = pci_enable_device(pdev);
@@ -1497,7 +1497,7 @@ static void __devexit i5000_remove_one(struct pci_dev *pdev)
 {
        struct mem_ctl_info *mci;
 
-       debugf0(__FILE__ ": %s()\n", __func__);
+       debugf0("%s: %s()\n", __FILE__, __func__);
 
        if (i5000_pci)
                edac_pci_release_generic_ctl(i5000_pci);
@@ -1544,7 +1544,7 @@ static int __init i5000_init(void)
 {
        int pci_rc;
 
-       debugf2("MC: " __FILE__ ": %s()\n", __func__);
+       debugf2("MC: %s: %s()\n", __FILE__, __func__);
 
        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
        opstate_init();
@@ -1560,7 +1560,7 @@ static int __init i5000_init(void)
  */
 static void __exit i5000_exit(void)
 {
-       debugf2("MC: " __FILE__ ": %s()\n", __func__);
+       debugf2("MC: %s: %s()\n", __FILE__, __func__);
        pci_unregister_driver(&i5000_driver);
 }
 
index f99d106..010c1d6 100644 (file)
@@ -694,7 +694,7 @@ static void i5400_clear_error(struct mem_ctl_info *mci)
 static void i5400_check_error(struct mem_ctl_info *mci)
 {
        struct i5400_error_info info;
-       debugf4("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
+       debugf4("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__);
        i5400_get_error_info(mci, &info);
        i5400_process_error_info(mci, &info);
 }
@@ -1227,8 +1227,8 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
        if (dev_idx >= ARRAY_SIZE(i5400_devs))
                return -EINVAL;
 
-       debugf0("MC: " __FILE__ ": %s(), pdev bus %u dev=0x%x fn=0x%x\n",
-               __func__,
+       debugf0("MC: %s: %s(), pdev bus %u dev=0x%x fn=0x%x\n",
+               __FILE__, __func__,
                pdev->bus->number,
                PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 
@@ -1256,7 +1256,7 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
        if (mci == NULL)
                return -ENOMEM;
 
-       debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
+       debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci);
 
        mci->dev = &pdev->dev;  /* record ptr  to the generic device */
 
@@ -1299,8 +1299,8 @@ static int i5400_probe1(struct pci_dev *pdev, int dev_idx)
 
        /* add this new MC control structure to EDAC's list of MCs */
        if (edac_mc_add_mc(mci)) {
-               debugf0("MC: " __FILE__
-                       ": %s(): failed edac_mc_add_mc()\n", __func__);
+               debugf0("MC: %s: %s(): failed edac_mc_add_mc()\n",
+                       __FILE__, __func__);
                /* FIXME: perhaps some code should go here that disables error
                 * reporting if we just enabled it
                 */
@@ -1344,7 +1344,7 @@ static int __devinit i5400_init_one(struct pci_dev *pdev,
 {
        int rc;
 
-       debugf0("MC: " __FILE__ ": %s()\n", __func__);
+       debugf0("MC: %s: %s()\n", __FILE__, __func__);
 
        /* wake up device */
        rc = pci_enable_device(pdev);
@@ -1363,7 +1363,7 @@ static void __devexit i5400_remove_one(struct pci_dev *pdev)
 {
        struct mem_ctl_info *mci;
 
-       debugf0(__FILE__ ": %s()\n", __func__);
+       debugf0("%s: %s()\n", __FILE__, __func__);
 
        if (i5400_pci)
                edac_pci_release_generic_ctl(i5400_pci);
@@ -1409,7 +1409,7 @@ static int __init i5400_init(void)
 {
        int pci_rc;
 
-       debugf2("MC: " __FILE__ ": %s()\n", __func__);
+       debugf2("MC: %s: %s()\n", __FILE__, __func__);
 
        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
        opstate_init();
@@ -1425,7 +1425,7 @@ static int __init i5400_init(void)
  */
 static void __exit i5400_exit(void)
 {
-       debugf2("MC: " __FILE__ ": %s()\n", __func__);
+       debugf2("MC: %s: %s()\n", __FILE__, __func__);
        pci_unregister_driver(&i5400_driver);
 }
 
index 2bf2c50..a2fa1fe 100644 (file)
@@ -178,7 +178,7 @@ static void i82443bxgx_edacmc_check(struct mem_ctl_info *mci)
 {
        struct i82443bxgx_edacmc_error_info info;
 
-       debugf1("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
+       debugf1("MC%d: %s: %s()\n", mci->mc_idx, __FILE__, __func__);
        i82443bxgx_edacmc_get_error_info(mci, &info);
        i82443bxgx_edacmc_process_error_info(mci, &info, 1);
 }
@@ -198,13 +198,13 @@ static void i82443bxgx_init_csrows(struct mem_ctl_info *mci,
        for (index = 0; index < mci->nr_csrows; index++) {
                csrow = &mci->csrows[index];
                pci_read_config_byte(pdev, I82443BXGX_DRB + index, &drbar);
-               debugf1("MC%d: " __FILE__ ": %s() Row=%d DRB = %#0x\n",
-                       mci->mc_idx, __func__, index, drbar);
+               debugf1("MC%d: %s: %s() Row=%d DRB = %#0x\n",
+                       mci->mc_idx, __FILE__, __func__, index, drbar);
                row_high_limit = ((u32) drbar << 23);
                /* find the DRAM Chip Select Base address and mask */
-               debugf1("MC%d: " __FILE__ ": %s() Row=%d, "
-                       "Boundry Address=%#0x, Last = %#0x \n",
-                       mci->mc_idx, __func__, index, row_high_limit,
+               debugf1("MC%d: %s: %s() Row=%d, "
+                       "Boundry Address=%#0x, Last = %#0x\n",
+                       mci->mc_idx, __FILE__, __func__, index, row_high_limit,
                        row_high_limit_last);
 
                /* 440GX goes to 2GB, represented with a DRB of 0. */
@@ -237,7 +237,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
        enum mem_type mtype;
        enum edac_type edac_mode;
 
-       debugf0("MC: " __FILE__ ": %s()\n", __func__);
+       debugf0("MC: %s: %s()\n", __FILE__, __func__);
 
        /* Something is really hosed if PCI config space reads from
         * the MC aren't working.
@@ -250,7 +250,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
        if (mci == NULL)
                return -ENOMEM;
 
-       debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
+       debugf0("MC: %s: %s(): mci = %p\n", __FILE__, __func__, mci);
        mci->dev = &pdev->dev;
        mci->mtype_cap = MEM_FLAG_EDO | MEM_FLAG_SDR | MEM_FLAG_RDR;
        mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
@@ -336,7 +336,7 @@ static int i82443bxgx_edacmc_probe1(struct pci_dev *pdev, int dev_idx)
                        __func__);
        }
 
-       debugf3("MC: " __FILE__ ": %s(): success\n", __func__);
+       debugf3("MC: %s: %s(): success\n", __FILE__, __func__);
        return 0;
 
 fail:
@@ -352,7 +352,7 @@ static int __devinit i82443bxgx_edacmc_init_one(struct pci_dev *pdev,
 {
        int rc;
 
-       debugf0("MC: " __FILE__ ": %s()\n", __func__);
+       debugf0("MC: %s: %s()\n", __FILE__, __func__);
 
        /* don't need to call pci_enable_device() */
        rc = i82443bxgx_edacmc_probe1(pdev, ent->driver_data);
@@ -367,7 +367,7 @@ static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev)
 {
        struct mem_ctl_info *mci;
 
-       debugf0(__FILE__ ": %s()\n", __func__);
+       debugf0("%s: %s()\n", __FILE__, __func__);
 
        if (i82443bxgx_pci)
                edac_pci_release_generic_ctl(i82443bxgx_pci);
index 5045156..9dcb304 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
-#include <linux/timer.h>
 #include <linux/workqueue.h>
 
 #include <asm/atomic.h>
@@ -63,7 +62,7 @@ static size_t config_rom_length = 1 + 4 + 1 + 1;
 #define BIB_CRC(v)             ((v) <<  0)
 #define BIB_CRC_LENGTH(v)      ((v) << 16)
 #define BIB_INFO_LENGTH(v)     ((v) << 24)
-
+#define BIB_BUS_NAME           0x31333934 /* "1394" */
 #define BIB_LINK_SPEED(v)      ((v) <<  0)
 #define BIB_GENERATION(v)      ((v) <<  4)
 #define BIB_MAX_ROM(v)         ((v) <<  8)
@@ -73,7 +72,8 @@ static size_t config_rom_length = 1 + 4 + 1 + 1;
 #define BIB_BMC                        ((1) << 28)
 #define BIB_ISC                        ((1) << 29)
 #define BIB_CMC                        ((1) << 30)
-#define BIB_IMC                        ((1) << 31)
+#define BIB_IRMC               ((1) << 31)
+#define NODE_CAPABILITIES      0x0c0083c0 /* per IEEE 1394 clause 8.3.2.6.5.2 */
 
 static void generate_config_rom(struct fw_card *card, __be32 *config_rom)
 {
@@ -91,18 +91,18 @@ static void generate_config_rom(struct fw_card *card, __be32 *config_rom)
 
        config_rom[0] = cpu_to_be32(
                BIB_CRC_LENGTH(4) | BIB_INFO_LENGTH(4) | BIB_CRC(0));
-       config_rom[1] = cpu_to_be32(0x31333934);
+       config_rom[1] = cpu_to_be32(BIB_BUS_NAME);
        config_rom[2] = cpu_to_be32(
                BIB_LINK_SPEED(card->link_speed) |
                BIB_GENERATION(card->config_rom_generation++ % 14 + 2) |
                BIB_MAX_ROM(2) |
                BIB_MAX_RECEIVE(card->max_receive) |
-               BIB_BMC | BIB_ISC | BIB_CMC | BIB_IMC);
+               BIB_BMC | BIB_ISC | BIB_CMC | BIB_IRMC);
        config_rom[3] = cpu_to_be32(card->guid >> 32);
        config_rom[4] = cpu_to_be32(card->guid);
 
        /* Generate root directory. */
-       config_rom[6] = cpu_to_be32(0x0c0083c0); /* node capabilities */
+       config_rom[6] = cpu_to_be32(NODE_CAPABILITIES);
        i = 7;
        j = 7 + descriptor_count;
 
@@ -407,13 +407,6 @@ static void fw_card_bm_work(struct work_struct *work)
        fw_card_put(card);
 }
 
-static void flush_timer_callback(unsigned long data)
-{
-       struct fw_card *card = (struct fw_card *)data;
-
-       fw_flush_transactions(card);
-}
-
 void fw_card_initialize(struct fw_card *card,
                        const struct fw_card_driver *driver,
                        struct device *device)
@@ -432,8 +425,6 @@ void fw_card_initialize(struct fw_card *card,
        init_completion(&card->done);
        INIT_LIST_HEAD(&card->transaction_list);
        spin_lock_init(&card->lock);
-       setup_timer(&card->flush_timer,
-                   flush_timer_callback, (unsigned long)card);
 
        card->local_node = NULL;
 
@@ -558,7 +549,6 @@ void fw_core_remove_card(struct fw_card *card)
        wait_for_completion(&card->done);
 
        WARN_ON(!list_empty(&card->transaction_list));
-       del_timer_sync(&card->flush_timer);
 }
 EXPORT_SYMBOL(fw_core_remove_card);
 
index 14a34d9..5bf106b 100644 (file)
@@ -227,7 +227,7 @@ static int fw_device_op_open(struct inode *inode, struct file *file)
        list_add_tail(&client->link, &device->client_list);
        mutex_unlock(&device->client_list_mutex);
 
-       return 0;
+       return nonseekable_open(inode, file);
 }
 
 static void queue_event(struct client *client, struct event *event,
@@ -1496,13 +1496,13 @@ static unsigned int fw_device_op_poll(struct file *file, poll_table * pt)
 
 const struct file_operations fw_device_ops = {
        .owner          = THIS_MODULE,
+       .llseek         = no_llseek,
        .open           = fw_device_op_open,
        .read           = fw_device_op_read,
        .unlocked_ioctl = fw_device_op_ioctl,
-       .poll           = fw_device_op_poll,
-       .release        = fw_device_op_release,
        .mmap           = fw_device_op_mmap,
-
+       .release        = fw_device_op_release,
+       .poll           = fw_device_op_poll,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = fw_device_op_compat_ioctl,
 #endif
index 673b03f..fdc33ff 100644 (file)
@@ -81,7 +81,7 @@ static int close_transaction(struct fw_transaction *transaction,
        spin_lock_irqsave(&card->lock, flags);
        list_for_each_entry(t, &card->transaction_list, link) {
                if (t == transaction) {
-                       list_del(&t->link);
+                       list_del_init(&t->link);
                        card->tlabel_mask &= ~(1ULL << t->tlabel);
                        break;
                }
@@ -89,6 +89,7 @@ static int close_transaction(struct fw_transaction *transaction,
        spin_unlock_irqrestore(&card->lock, flags);
 
        if (&t->link != &card->transaction_list) {
+               del_timer_sync(&t->split_timeout_timer);
                t->callback(card, rcode, NULL, 0, t->callback_data);
                return 0;
        }
@@ -121,6 +122,31 @@ int fw_cancel_transaction(struct fw_card *card,
 }
 EXPORT_SYMBOL(fw_cancel_transaction);
 
+static void split_transaction_timeout_callback(unsigned long data)
+{
+       struct fw_transaction *t = (struct fw_transaction *)data;
+       struct fw_card *card = t->card;
+       unsigned long flags;
+
+       spin_lock_irqsave(&card->lock, flags);
+       if (list_empty(&t->link)) {
+               spin_unlock_irqrestore(&card->lock, flags);
+               return;
+       }
+       list_del(&t->link);
+       card->tlabel_mask &= ~(1ULL << t->tlabel);
+       spin_unlock_irqrestore(&card->lock, flags);
+
+       card->driver->cancel_packet(card, &t->packet);
+
+       /*
+        * At this point cancel_packet will never call the transaction
+        * callback, since we just took the transaction out of the list.
+        * So do it here.
+        */
+       t->callback(card, RCODE_CANCELLED, NULL, 0, t->callback_data);
+}
+
 static void transmit_complete_callback(struct fw_packet *packet,
                                       struct fw_card *card, int status)
 {
@@ -229,6 +255,23 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel,
        packet->payload_mapped = false;
 }
 
+static int allocate_tlabel(struct fw_card *card)
+{
+       int tlabel;
+
+       tlabel = card->current_tlabel;
+       while (card->tlabel_mask & (1ULL << tlabel)) {
+               tlabel = (tlabel + 1) & 0x3f;
+               if (tlabel == card->current_tlabel)
+                       return -EBUSY;
+       }
+
+       card->current_tlabel = (tlabel + 1) & 0x3f;
+       card->tlabel_mask |= 1ULL << tlabel;
+
+       return tlabel;
+}
+
 /**
  * This function provides low-level access to the IEEE1394 transaction
  * logic.  Most C programs would use either fw_read(), fw_write() or
@@ -277,31 +320,26 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
        int tlabel;
 
        /*
-        * Bump the flush timer up 100ms first of all so we
-        * don't race with a flush timer callback.
-        */
-
-       mod_timer(&card->flush_timer, jiffies + DIV_ROUND_UP(HZ, 10));
-
-       /*
         * Allocate tlabel from the bitmap and put the transaction on
         * the list while holding the card spinlock.
         */
 
        spin_lock_irqsave(&card->lock, flags);
 
-       tlabel = card->current_tlabel;
-       if (card->tlabel_mask & (1ULL << tlabel)) {
+       tlabel = allocate_tlabel(card);
+       if (tlabel < 0) {
                spin_unlock_irqrestore(&card->lock, flags);
                callback(card, RCODE_SEND_ERROR, NULL, 0, callback_data);
                return;
        }
 
-       card->current_tlabel = (card->current_tlabel + 1) & 0x3f;
-       card->tlabel_mask |= (1ULL << tlabel);
-
        t->node_id = destination_id;
        t->tlabel = tlabel;
+       t->card = card;
+       setup_timer(&t->split_timeout_timer,
+                   split_transaction_timeout_callback, (unsigned long)t);
+       /* FIXME: start this timer later, relative to t->timestamp */
+       mod_timer(&t->split_timeout_timer, jiffies + DIV_ROUND_UP(HZ, 10));
        t->callback = callback;
        t->callback_data = callback_data;
 
@@ -347,11 +385,13 @@ int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
        struct transaction_callback_data d;
        struct fw_transaction t;
 
+       init_timer_on_stack(&t.split_timeout_timer);
        init_completion(&d.done);
        d.payload = payload;
        fw_send_request(card, &t, tcode, destination_id, generation, speed,
                        offset, payload, length, transaction_callback, &d);
        wait_for_completion(&d.done);
+       destroy_timer_on_stack(&t.split_timeout_timer);
 
        return d.rcode;
 }
@@ -394,30 +434,6 @@ void fw_send_phy_config(struct fw_card *card,
        mutex_unlock(&phy_config_mutex);
 }
 
-void fw_flush_transactions(struct fw_card *card)
-{
-       struct fw_transaction *t, *next;
-       struct list_head list;
-       unsigned long flags;
-
-       INIT_LIST_HEAD(&list);
-       spin_lock_irqsave(&card->lock, flags);
-       list_splice_init(&card->transaction_list, &list);
-       card->tlabel_mask = 0;
-       spin_unlock_irqrestore(&card->lock, flags);
-
-       list_for_each_entry_safe(t, next, &list, link) {
-               card->driver->cancel_packet(card, &t->packet);
-
-               /*
-                * At this point cancel_packet will never call the
-                * transaction callback, since we just took all the
-                * transactions out of the list.  So do it here.
-                */
-               t->callback(card, RCODE_CANCELLED, NULL, 0, t->callback_data);
-       }
-}
-
 static struct fw_address_handler *lookup_overlapping_address_handler(
        struct list_head *list, unsigned long long offset, size_t length)
 {
@@ -827,8 +843,8 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
        spin_lock_irqsave(&card->lock, flags);
        list_for_each_entry(t, &card->transaction_list, link) {
                if (t->node_id == source && t->tlabel == tlabel) {
-                       list_del(&t->link);
-                       card->tlabel_mask &= ~(1 << t->tlabel);
+                       list_del_init(&t->link);
+                       card->tlabel_mask &= ~(1ULL << t->tlabel);
                        break;
                }
        }
@@ -869,6 +885,8 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
                break;
        }
 
+       del_timer_sync(&t->split_timeout_timer);
+
        /*
         * The response handler may be executed while the request handler
         * is still pending.  Cancel the request handler.
index fb03213..0ecfcd9 100644 (file)
@@ -27,7 +27,12 @@ struct fw_packet;
 #define PHY_LINK_ACTIVE                0x80
 #define PHY_CONTENDER          0x40
 #define PHY_BUS_RESET          0x40
+#define PHY_EXTENDED_REGISTERS 0xe0
 #define PHY_BUS_SHORT_RESET    0x40
+#define PHY_INT_STATUS_BITS    0x3c
+#define PHY_ENABLE_ACCEL       0x02
+#define PHY_ENABLE_MULTI       0x01
+#define PHY_PAGE_SELECT                0xe0
 
 #define BANDWIDTH_AVAILABLE_INITIAL    4915
 #define BROADCAST_CHANNEL_INITIAL      (1 << 31 | 31)
@@ -215,7 +220,6 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *request);
 void fw_core_handle_response(struct fw_card *card, struct fw_packet *packet);
 void fw_fill_response(struct fw_packet *response, u32 *request_header,
                      int rcode, void *payload, size_t length);
-void fw_flush_transactions(struct fw_card *card);
 void fw_send_phy_config(struct fw_card *card,
                        int node_id, int generation, int gap_count);
 
index a3b083a..9f627e7 100644 (file)
@@ -236,13 +236,15 @@ static char ohci_driver_name[] = KBUILD_MODNAME;
 #define QUIRK_CYCLE_TIMER              1
 #define QUIRK_RESET_PACKET             2
 #define QUIRK_BE_HEADERS               4
+#define QUIRK_NO_1394A                 8
 
 /* In case of multiple matches in ohci_quirks[], only the first one is used. */
 static const struct {
        unsigned short vendor, device, flags;
 } ohci_quirks[] = {
        {PCI_VENDOR_ID_TI,      PCI_DEVICE_ID_TI_TSB12LV22, QUIRK_CYCLE_TIMER |
-                                                           QUIRK_RESET_PACKET},
+                                                           QUIRK_RESET_PACKET |
+                                                           QUIRK_NO_1394A},
        {PCI_VENDOR_ID_TI,      PCI_ANY_ID,     QUIRK_RESET_PACKET},
        {PCI_VENDOR_ID_AL,      PCI_ANY_ID,     QUIRK_CYCLE_TIMER},
        {PCI_VENDOR_ID_NEC,     PCI_ANY_ID,     QUIRK_CYCLE_TIMER},
@@ -257,15 +259,16 @@ MODULE_PARM_DESC(quirks, "Chip quirks (default = 0"
        ", nonatomic cycle timer = "    __stringify(QUIRK_CYCLE_TIMER)
        ", reset packet generation = "  __stringify(QUIRK_RESET_PACKET)
        ", AR/selfID endianess = "      __stringify(QUIRK_BE_HEADERS)
+       ", no 1394a enhancements = "    __stringify(QUIRK_NO_1394A)
        ")");
 
-#ifdef CONFIG_FIREWIRE_OHCI_DEBUG
-
 #define OHCI_PARAM_DEBUG_AT_AR         1
 #define OHCI_PARAM_DEBUG_SELFIDS       2
 #define OHCI_PARAM_DEBUG_IRQS          4
 #define OHCI_PARAM_DEBUG_BUSRESETS     8 /* only effective before chip init */
 
+#ifdef CONFIG_FIREWIRE_OHCI_DEBUG
+
 static int param_debug;
 module_param_named(debug, param_debug, int, 0644);
 MODULE_PARM_DESC(debug, "Verbose logging (default = 0"
@@ -438,9 +441,10 @@ static void log_ar_at_event(char dir, int speed, u32 *header, int evt)
 
 #else
 
-#define log_irqs(evt)
-#define log_selfids(node_id, generation, self_id_count, sid)
-#define log_ar_at_event(dir, speed, header, evt)
+#define param_debug 0
+static inline void log_irqs(u32 evt) {}
+static inline void log_selfids(int node_id, int generation, int self_id_count, u32 *s) {}
+static inline void log_ar_at_event(char dir, int speed, u32 *header, int evt) {}
 
 #endif /* CONFIG_FIREWIRE_OHCI_DEBUG */
 
@@ -460,27 +464,71 @@ static inline void flush_writes(const struct fw_ohci *ohci)
        reg_read(ohci, OHCI1394_Version);
 }
 
-static int ohci_update_phy_reg(struct fw_card *card, int addr,
-                              int clear_bits, int set_bits)
+static int read_phy_reg(struct fw_ohci *ohci, int addr)
 {
-       struct fw_ohci *ohci = fw_ohci(card);
-       u32 val, old;
+       u32 val;
+       int i;
 
        reg_write(ohci, OHCI1394_PhyControl, OHCI1394_PhyControl_Read(addr));
-       flush_writes(ohci);
-       msleep(2);
-       val = reg_read(ohci, OHCI1394_PhyControl);
-       if ((val & OHCI1394_PhyControl_ReadDone) == 0) {
-               fw_error("failed to set phy reg bits.\n");
-               return -EBUSY;
+       for (i = 0; i < 10; i++) {
+               val = reg_read(ohci, OHCI1394_PhyControl);
+               if (val & OHCI1394_PhyControl_ReadDone)
+                       return OHCI1394_PhyControl_ReadData(val);
+
+               msleep(1);
        }
+       fw_error("failed to read phy reg\n");
+
+       return -EBUSY;
+}
+
+static int write_phy_reg(const struct fw_ohci *ohci, int addr, u32 val)
+{
+       int i;
 
-       old = OHCI1394_PhyControl_ReadData(val);
-       old = (old & ~clear_bits) | set_bits;
        reg_write(ohci, OHCI1394_PhyControl,
-                 OHCI1394_PhyControl_Write(addr, old));
+                 OHCI1394_PhyControl_Write(addr, val));
+       for (i = 0; i < 100; i++) {
+               val = reg_read(ohci, OHCI1394_PhyControl);
+               if (!(val & OHCI1394_PhyControl_WritePending))
+                       return 0;
 
-       return 0;
+               msleep(1);
+       }
+       fw_error("failed to write phy reg\n");
+
+       return -EBUSY;
+}
+
+static int ohci_update_phy_reg(struct fw_card *card, int addr,
+                              int clear_bits, int set_bits)
+{
+       struct fw_ohci *ohci = fw_ohci(card);
+       int ret;
+
+       ret = read_phy_reg(ohci, addr);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * The interrupt status bits are cleared by writing a one bit.
+        * Avoid clearing them unless explicitly requested in set_bits.
+        */
+       if (addr == 5)
+               clear_bits |= PHY_INT_STATUS_BITS;
+
+       return write_phy_reg(ohci, addr, (ret & ~clear_bits) | set_bits);
+}
+
+static int read_paged_phy_reg(struct fw_ohci *ohci, int page, int addr)
+{
+       int ret;
+
+       ret = ohci_update_phy_reg(&ohci->card, 7, PHY_PAGE_SELECT, page << 5);
+       if (ret < 0)
+               return ret;
+
+       return read_phy_reg(ohci, addr);
 }
 
 static int ar_context_add_page(struct ar_context *ctx)
@@ -1495,13 +1543,64 @@ static void copy_config_rom(__be32 *dest, const __be32 *src, size_t length)
                memset(&dest[length], 0, CONFIG_ROM_SIZE - size);
 }
 
+static int configure_1394a_enhancements(struct fw_ohci *ohci)
+{
+       bool enable_1394a;
+       int ret, clear, set, offset;
+
+       /* Check if the driver should configure link and PHY. */
+       if (!(reg_read(ohci, OHCI1394_HCControlSet) &
+             OHCI1394_HCControl_programPhyEnable))
+               return 0;
+
+       /* Paranoia: check whether the PHY supports 1394a, too. */
+       enable_1394a = false;
+       ret = read_phy_reg(ohci, 2);
+       if (ret < 0)
+               return ret;
+       if ((ret & PHY_EXTENDED_REGISTERS) == PHY_EXTENDED_REGISTERS) {
+               ret = read_paged_phy_reg(ohci, 1, 8);
+               if (ret < 0)
+                       return ret;
+               if (ret >= 1)
+                       enable_1394a = true;
+       }
+
+       if (ohci->quirks & QUIRK_NO_1394A)
+               enable_1394a = false;
+
+       /* Configure PHY and link consistently. */
+       if (enable_1394a) {
+               clear = 0;
+               set = PHY_ENABLE_ACCEL | PHY_ENABLE_MULTI;
+       } else {
+               clear = PHY_ENABLE_ACCEL | PHY_ENABLE_MULTI;
+               set = 0;
+       }
+       ret = ohci_update_phy_reg(&ohci->card, 5, clear, set);
+       if (ret < 0)
+               return ret;
+
+       if (enable_1394a)
+               offset = OHCI1394_HCControlSet;
+       else
+               offset = OHCI1394_HCControlClear;
+       reg_write(ohci, offset, OHCI1394_HCControl_aPhyEnhanceEnable);
+
+       /* Clean up: configuration has been taken care of. */
+       reg_write(ohci, OHCI1394_HCControlClear,
+                 OHCI1394_HCControl_programPhyEnable);
+
+       return 0;
+}
+
 static int ohci_enable(struct fw_card *card,
                       const __be32 *config_rom, size_t length)
 {
        struct fw_ohci *ohci = fw_ohci(card);
        struct pci_dev *dev = to_pci_dev(card->device);
        u32 lps;
-       int i;
+       int i, ret;
 
        if (software_reset(ohci)) {
                fw_error("Failed to reset ohci card.\n");
@@ -1565,10 +1664,14 @@ static int ohci_enable(struct fw_card *card,
        if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS)
                reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset);
 
+       ret = configure_1394a_enhancements(ohci);
+       if (ret < 0)
+               return ret;
+
        /* Activate link_on bit and contender bit in our self ID packets.*/
-       if (ohci_update_phy_reg(card, 4, 0,
-                               PHY_LINK_ACTIVE | PHY_CONTENDER) < 0)
-               return -EIO;
+       ret = ohci_update_phy_reg(card, 4, 0, PHY_LINK_ACTIVE | PHY_CONTENDER);
+       if (ret < 0)
+               return ret;
 
        /*
         * When the link is not yet enabled, the atomic config rom
@@ -2304,7 +2407,7 @@ static const struct fw_card_driver ohci_driver = {
 };
 
 #ifdef CONFIG_PPC_PMAC
-static void ohci_pmac_on(struct pci_dev *dev)
+static void pmac_ohci_on(struct pci_dev *dev)
 {
        if (machine_is(powermac)) {
                struct device_node *ofn = pci_device_to_OF_node(dev);
@@ -2316,7 +2419,7 @@ static void ohci_pmac_on(struct pci_dev *dev)
        }
 }
 
-static void ohci_pmac_off(struct pci_dev *dev)
+static void pmac_ohci_off(struct pci_dev *dev)
 {
        if (machine_is(powermac)) {
                struct device_node *ofn = pci_device_to_OF_node(dev);
@@ -2328,15 +2431,15 @@ static void ohci_pmac_off(struct pci_dev *dev)
        }
 }
 #else
-#define ohci_pmac_on(dev)
-#define ohci_pmac_off(dev)
+static inline void pmac_ohci_on(struct pci_dev *dev) {}
+static inline void pmac_ohci_off(struct pci_dev *dev) {}
 #endif /* CONFIG_PPC_PMAC */
 
 static int __devinit pci_probe(struct pci_dev *dev,
                               const struct pci_device_id *ent)
 {
        struct fw_ohci *ohci;
-       u32 bus_options, max_receive, link_speed, version;
+       u32 bus_options, max_receive, link_speed, version, link_enh;
        u64 guid;
        int i, err, n_ir, n_it;
        size_t size;
@@ -2349,7 +2452,7 @@ static int __devinit pci_probe(struct pci_dev *dev,
 
        fw_card_initialize(&ohci->card, &ohci_driver, &dev->dev);
 
-       ohci_pmac_on(dev);
+       pmac_ohci_on(dev);
 
        err = pci_enable_device(dev);
        if (err) {
@@ -2389,6 +2492,23 @@ static int __devinit pci_probe(struct pci_dev *dev,
        if (param_quirks)
                ohci->quirks = param_quirks;
 
+       /* TI OHCI-Lynx and compatible: set recommended configuration bits. */
+       if (dev->vendor == PCI_VENDOR_ID_TI) {
+               pci_read_config_dword(dev, PCI_CFG_TI_LinkEnh, &link_enh);
+
+               /* adjust latency of ATx FIFO: use 1.7 KB threshold */
+               link_enh &= ~TI_LinkEnh_atx_thresh_mask;
+               link_enh |= TI_LinkEnh_atx_thresh_1_7K;
+
+               /* use priority arbitration for asynchronous responses */
+               link_enh |= TI_LinkEnh_enab_unfair;
+
+               /* required for aPhyEnhanceEnable to work */
+               link_enh |= TI_LinkEnh_enab_accel;
+
+               pci_write_config_dword(dev, PCI_CFG_TI_LinkEnh, link_enh);
+       }
+
        ar_context_init(&ohci->ar_request_ctx, ohci,
                        OHCI1394_AsReqRcvContextControlSet);
 
@@ -2466,7 +2586,7 @@ static int __devinit pci_probe(struct pci_dev *dev,
        pci_disable_device(dev);
  fail_free:
        kfree(&ohci->card);
-       ohci_pmac_off(dev);
+       pmac_ohci_off(dev);
  fail:
        if (err == -ENOMEM)
                fw_error("Out of memory\n");
@@ -2509,7 +2629,7 @@ static void pci_remove(struct pci_dev *dev)
        pci_release_region(dev, 0);
        pci_disable_device(dev);
        kfree(&ohci->card);
-       ohci_pmac_off(dev);
+       pmac_ohci_off(dev);
 
        fw_notify("Removed fw-ohci device.\n");
 }
@@ -2530,7 +2650,7 @@ static int pci_suspend(struct pci_dev *dev, pm_message_t state)
        err = pci_set_power_state(dev, pci_choose_state(dev, state));
        if (err)
                fw_error("pci_set_power_state failed with %d\n", err);
-       ohci_pmac_off(dev);
+       pmac_ohci_off(dev);
 
        return 0;
 }
@@ -2540,7 +2660,7 @@ static int pci_resume(struct pci_dev *dev)
        struct fw_ohci *ohci = pci_get_drvdata(dev);
        int err;
 
-       ohci_pmac_on(dev);
+       pmac_ohci_on(dev);
        pci_set_power_state(dev, PCI_D0);
        pci_restore_state(dev);
        err = pci_enable_device(dev);
index ba492d8..3bc9a5d 100644 (file)
@@ -67,7 +67,7 @@
 #define   OHCI1394_PhyControl_ReadDone         0x80000000
 #define   OHCI1394_PhyControl_ReadData(r)      (((r) & 0x00ff0000) >> 16)
 #define   OHCI1394_PhyControl_Write(addr, data)        (((addr) << 8) | (data) | 0x00004000)
-#define   OHCI1394_PhyControl_WriteDone                0x00004000
+#define   OHCI1394_PhyControl_WritePending     0x00004000
 #define OHCI1394_IsochronousCycleTimer        0x0F0
 #define OHCI1394_AsReqFilterHiSet             0x100
 #define OHCI1394_AsReqFilterHiClear           0x104
 
 #define OHCI1394_phy_tcode             0xe
 
+/* TI extensions */
+
+#define PCI_CFG_TI_LinkEnh             0xf4
+#define  TI_LinkEnh_enab_accel         0x00000002
+#define  TI_LinkEnh_enab_unfair                0x00000080
+#define  TI_LinkEnh_atx_thresh_mask    0x00003000
+#define  TI_LinkEnh_atx_thresh_1_7K    0x00001000
+
 #endif /* _FIREWIRE_OHCI_H */
index fee678f..4fd0f27 100644 (file)
@@ -139,6 +139,13 @@ config GPIO_MAX732X
          Board setup code must specify the model to use, and the start
          number for these GPIOs.
 
+config GPIO_MAX732X_IRQ
+       bool "Interrupt controller support for MAX732x"
+       depends on GPIO_MAX732X=y && GENERIC_HARDIRQS
+       help
+         Say yes here to enable the max732x to be used as an interrupt
+         controller. It requires the driver to be built in the kernel.
+
 config GPIO_PCA953X
        tristate "PCA953x, PCA955x, TCA64xx, and MAX7310 I/O ports"
        depends on I2C
@@ -264,10 +271,10 @@ config GPIO_BT8XX
          If unsure, say N.
 
 config GPIO_LANGWELL
-       bool "Intel Moorestown Platform Langwell GPIO support"
+       bool "Intel Langwell/Penwell GPIO support"
        depends on PCI
        help
-         Say Y here to support Intel Moorestown platform GPIO.
+         Say Y here to support Intel Langwell/Penwell GPIO.
 
 config GPIO_TIMBERDALE
        bool "Support for timberdale GPIO IP"
index 0c3c498..f73a155 100644 (file)
@@ -197,7 +197,7 @@ static int chip_direction_output(struct gpio_chip *c, unsigned offset, int val)
        return 0;
 }
 
-static char *cs5535_gpio_names[] = {
+static const char * const cs5535_gpio_names[] = {
        "GPIO0", "GPIO1", "GPIO2", "GPIO3",
        "GPIO4", "GPIO5", "GPIO6", "GPIO7",
        "GPIO8", "GPIO9", "GPIO10", "GPIO11",
index cae1b8c..3ca3654 100644 (file)
@@ -722,7 +722,7 @@ int gpio_export(unsigned gpio, bool direction_may_change)
        unsigned long           flags;
        struct gpio_desc        *desc;
        int                     status = -EINVAL;
-       char                    *ioname = NULL;
+       const char              *ioname = NULL;
 
        /* can't export until sysfs is available ... */
        if (!gpio_class.p) {
@@ -753,7 +753,7 @@ int gpio_export(unsigned gpio, bool direction_may_change)
                struct device   *dev;
 
                dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
-                               desc, ioname ? ioname : "gpio%d", gpio);
+                               desc, ioname ? ioname : "gpio%u", gpio);
                if (!IS_ERR(dev)) {
                        status = sysfs_create_group(&dev->kobj,
                                                &gpio_attr_group);
@@ -1106,7 +1106,7 @@ unlock:
 fail:
        /* failures here can mean systems won't boot... */
        if (status)
-               pr_err("gpiochip_add: gpios %d..%d (%s) not registered\n",
+               pr_err("gpiochip_add: gpios %d..%d (%s) failed to register\n",
                        chip->base, chip->base + chip->ngpio - 1,
                        chip->label ? : "generic");
        return status;
@@ -1447,6 +1447,49 @@ fail:
 }
 EXPORT_SYMBOL_GPL(gpio_direction_output);
 
+/**
+ * gpio_set_debounce - sets @debounce time for a @gpio
+ * @gpio: the gpio to set debounce time
+ * @debounce: debounce time is microseconds
+ */
+int gpio_set_debounce(unsigned gpio, unsigned debounce)
+{
+       unsigned long           flags;
+       struct gpio_chip        *chip;
+       struct gpio_desc        *desc = &gpio_desc[gpio];
+       int                     status = -EINVAL;
+
+       spin_lock_irqsave(&gpio_lock, flags);
+
+       if (!gpio_is_valid(gpio))
+               goto fail;
+       chip = desc->chip;
+       if (!chip || !chip->set || !chip->set_debounce)
+               goto fail;
+       gpio -= chip->base;
+       if (gpio >= chip->ngpio)
+               goto fail;
+       status = gpio_ensure_requested(desc, gpio);
+       if (status < 0)
+               goto fail;
+
+       /* now we know the gpio is valid and chip won't vanish */
+
+       spin_unlock_irqrestore(&gpio_lock, flags);
+
+       might_sleep_if(extra_checks && chip->can_sleep);
+
+       return chip->set_debounce(chip, gpio, debounce);
+
+fail:
+       spin_unlock_irqrestore(&gpio_lock, flags);
+       if (status)
+               pr_debug("%s: gpio-%d status %d\n",
+                       __func__, gpio, status);
+
+       return status;
+}
+EXPORT_SYMBOL_GPL(gpio_set_debounce);
 
 /* I/O calls are only valid after configuration completed; the relevant
  * "is this a valid GPIO" error checks should already have been done.
index 41a9388..48fc43c 100644 (file)
@@ -217,7 +217,10 @@ gpiochip_add_err:
 static void __exit it8761e_gpio_exit(void)
 {
        if (gpio_ba) {
-               gpiochip_remove(&it8761e_gpio_chip);
+               int ret = gpiochip_remove(&it8761e_gpio_chip);
+
+               WARN(ret, "%s(): gpiochip_remove() failed, ret=%d\n",
+                               __func__, ret);
 
                release_region(gpio_ba, GPIO_IOSIZE);
                gpio_ba = 0;
index 00c3a14..8383a8d 100644 (file)
@@ -17,6 +17,7 @@
 
 /* Supports:
  * Moorestown platform Langwell chip.
+ * Medfield platform Penwell chip.
  */
 
 #include <linux/module.h>
 #include <linux/gpio.h>
 #include <linux/slab.h>
 
-struct lnw_gpio_register {
-       u32     GPLR[2];
-       u32     GPDR[2];
-       u32     GPSR[2];
-       u32     GPCR[2];
-       u32     GRER[2];
-       u32     GFER[2];
-       u32     GEDR[2];
+/*
+ * Langwell chip has 64 pins and thus there are 2 32bit registers to control
+ * each feature, while Penwell chip has 96 pins for each block, and need 3 32bit
+ * registers to control them, so we only define the order here instead of a
+ * structure, to get a bit offset for a pin (use GPDR as an example):
+ *
+ * nreg = ngpio / 32;
+ * reg = offset / 32;
+ * bit = offset % 32;
+ * reg_addr = reg_base + GPDR * nreg * 4 + reg * 4;
+ *
+ * so the bit of reg_addr is to control pin offset's GPDR feature
+*/
+
+enum GPIO_REG {
+       GPLR = 0,       /* pin level read-only */
+       GPDR,           /* pin direction */
+       GPSR,           /* pin set */
+       GPCR,           /* pin clear */
+       GRER,           /* rising edge detect */
+       GFER,           /* falling edge detect */
+       GEDR,           /* edge detect result */
 };
 
 struct lnw_gpio {
        struct gpio_chip                chip;
-       struct lnw_gpio_register        *reg_base;
+       void                            *reg_base;
        spinlock_t                      lock;
        unsigned                        irq_base;
 };
 
-static int lnw_gpio_get(struct gpio_chip *chip, unsigned offset)
+static void __iomem *gpio_reg(struct gpio_chip *chip, unsigned offset,
+                       enum GPIO_REG reg_type)
 {
        struct lnw_gpio *lnw = container_of(chip, struct lnw_gpio, chip);
+       unsigned nreg = chip->ngpio / 32;
        u8 reg = offset / 32;
-       void __iomem *gplr;
+       void __iomem *ptr;
+
+       ptr = (void __iomem *)(lnw->reg_base + reg_type * nreg * 4 + reg * 4);
+       return ptr;
+}
+
+static int lnw_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+       void __iomem *gplr = gpio_reg(chip, offset, GPLR);
 
-       gplr = (void __iomem *)(&lnw->reg_base->GPLR[reg]);
        return readl(gplr) & BIT(offset % 32);
 }
 
 static void lnw_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 {
-       struct lnw_gpio *lnw = container_of(chip, struct lnw_gpio, chip);
-       u8 reg = offset / 32;
        void __iomem *gpsr, *gpcr;
 
        if (value) {
-               gpsr = (void __iomem *)(&lnw->reg_base->GPSR[reg]);
+               gpsr = gpio_reg(chip, offset, GPSR);
                writel(BIT(offset % 32), gpsr);
        } else {
-               gpcr = (void __iomem *)(&lnw->reg_base->GPCR[reg]);
+               gpcr = gpio_reg(chip, offset, GPCR);
                writel(BIT(offset % 32), gpcr);
        }
 }
@@ -76,12 +98,10 @@ static void lnw_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 static int lnw_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 {
        struct lnw_gpio *lnw = container_of(chip, struct lnw_gpio, chip);
-       u8 reg = offset / 32;
+       void __iomem *gpdr = gpio_reg(chip, offset, GPDR);
        u32 value;
        unsigned long flags;
-       void __iomem *gpdr;
 
-       gpdr = (void __iomem *)(&lnw->reg_base->GPDR[reg]);
        spin_lock_irqsave(&lnw->lock, flags);
        value = readl(gpdr);
        value &= ~BIT(offset % 32);
@@ -94,12 +114,10 @@ static int lnw_gpio_direction_output(struct gpio_chip *chip,
                        unsigned offset, int value)
 {
        struct lnw_gpio *lnw = container_of(chip, struct lnw_gpio, chip);
-       u8 reg = offset / 32;
+       void __iomem *gpdr = gpio_reg(chip, offset, GPDR);
        unsigned long flags;
-       void __iomem *gpdr;
 
        lnw_gpio_set(chip, offset, value);
-       gpdr = (void __iomem *)(&lnw->reg_base->GPDR[reg]);
        spin_lock_irqsave(&lnw->lock, flags);
        value = readl(gpdr);
        value |= BIT(offset % 32);;
@@ -118,11 +136,10 @@ static int lnw_irq_type(unsigned irq, unsigned type)
 {
        struct lnw_gpio *lnw = get_irq_chip_data(irq);
        u32 gpio = irq - lnw->irq_base;
-       u8 reg = gpio / 32;
        unsigned long flags;
        u32 value;
-       void __iomem *grer = (void __iomem *)(&lnw->reg_base->GRER[reg]);
-       void __iomem *gfer = (void __iomem *)(&lnw->reg_base->GFER[reg]);
+       void __iomem *grer = gpio_reg(&lnw->chip, gpio, GRER);
+       void __iomem *gfer = gpio_reg(&lnw->chip, gpio, GFER);
 
        if (gpio >= lnw->chip.ngpio)
                return -EINVAL;
@@ -158,8 +175,10 @@ static struct irq_chip lnw_irqchip = {
        .set_type       = lnw_irq_type,
 };
 
-static struct pci_device_id lnw_gpio_ids[] = {
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x080f) },
+static DEFINE_PCI_DEVICE_TABLE(lnw_gpio_ids) = {   /* pin number */
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x080f), .driver_data = 64 },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081f), .driver_data = 96 },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081a), .driver_data = 96 },
        { 0, }
 };
 MODULE_DEVICE_TABLE(pci, lnw_gpio_ids);
@@ -167,17 +186,17 @@ MODULE_DEVICE_TABLE(pci, lnw_gpio_ids);
 static void lnw_irq_handler(unsigned irq, struct irq_desc *desc)
 {
        struct lnw_gpio *lnw = (struct lnw_gpio *)get_irq_data(irq);
-       u32 reg, gpio;
+       u32 base, gpio;
        void __iomem *gedr;
        u32 gedr_v;
 
        /* check GPIO controller to check which pin triggered the interrupt */
-       for (reg = 0; reg < lnw->chip.ngpio / 32; reg++) {
-               gedr = (void __iomem *)(&lnw->reg_base->GEDR[reg]);
+       for (base = 0; base < lnw->chip.ngpio; base += 32) {
+               gedr = gpio_reg(&lnw->chip, base, GEDR);
                gedr_v = readl(gedr);
                if (!gedr_v)
                        continue;
-               for (gpio = reg*32; gpio < reg*32+32; gpio++)
+               for (gpio = base; gpio < base + 32; gpio++)
                        if (gedr_v & BIT(gpio % 32)) {
                                pr_debug("pin %d triggered\n", gpio);
                                generic_handle_irq(lnw->irq_base + gpio);
@@ -245,7 +264,7 @@ static int __devinit lnw_gpio_probe(struct pci_dev *pdev,
        lnw->chip.set = lnw_gpio_set;
        lnw->chip.to_irq = lnw_gpio_to_irq;
        lnw->chip.base = gpio_base;
-       lnw->chip.ngpio = 64;
+       lnw->chip.ngpio = id->driver_data;
        lnw->chip.can_sleep = 0;
        pci_set_drvdata(pdev, lnw);
        retval = gpiochip_add(&lnw->chip);
index f786824..9cad60f 100644 (file)
@@ -17,7 +17,8 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/gpio.h>
-
+#include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/i2c.h>
 #include <linux/i2c/max732x.h>
 
@@ -31,7 +32,8 @@
  *   - Open Drain I/O
  *
  * designated by 'O', 'I' and 'P' individually according to MAXIM's
- * datasheets.
+ * datasheets. 'I' and 'P' ports are interrupt capables, some with
+ * a dedicated interrupt mask.
  *
  * There are two groups of I/O ports, each group usually includes
  * up to 8 I/O ports, and is accessed by a specific I2C address:
@@ -44,7 +46,8 @@
  *
  * Within each group of ports, there are five known combinations of
  * I/O ports: 4I4O, 4P4O, 8I, 8P, 8O, see the definitions below for
- * the detailed organization of these ports.
+ * the detailed organization of these ports. Only Goup A is interrupt
+ * capable.
  *
  * GPIO numbers start from 'gpio_base + 0' to 'gpio_base + 8/16',
  * and GPIOs from GROUP_A are numbered before those from GROUP_B
 #define GROUP_A(x)     ((x) & 0xffff)  /* I2C Addr: 0b'110xxxx */
 #define GROUP_B(x)     ((x) << 16)     /* I2C Addr: 0b'101xxxx */
 
+#define INT_NONE       0x0     /* No interrupt capability */
+#define INT_NO_MASK    0x1     /* Has interrupts, no mask */
+#define INT_INDEP_MASK 0x2     /* Has interrupts, independent mask */
+#define INT_MERGED_MASK 0x3    /* Has interrupts, merged mask */
+
+#define INT_CAPS(x)    (((uint64_t)(x)) << 32)
+
+enum {
+       MAX7319,
+       MAX7320,
+       MAX7321,
+       MAX7322,
+       MAX7323,
+       MAX7324,
+       MAX7325,
+       MAX7326,
+       MAX7327,
+};
+
+static uint64_t max732x_features[] = {
+       [MAX7319] = GROUP_A(IO_8I) | INT_CAPS(INT_MERGED_MASK),
+       [MAX7320] = GROUP_B(IO_8O),
+       [MAX7321] = GROUP_A(IO_8P) | INT_CAPS(INT_NO_MASK),
+       [MAX7322] = GROUP_A(IO_4I4O) | INT_CAPS(INT_MERGED_MASK),
+       [MAX7323] = GROUP_A(IO_4P4O) | INT_CAPS(INT_INDEP_MASK),
+       [MAX7324] = GROUP_A(IO_8I) | GROUP_B(IO_8O) | INT_CAPS(INT_MERGED_MASK),
+       [MAX7325] = GROUP_A(IO_8P) | GROUP_B(IO_8O) | INT_CAPS(INT_NO_MASK),
+       [MAX7326] = GROUP_A(IO_4I4O) | GROUP_B(IO_8O) | INT_CAPS(INT_MERGED_MASK),
+       [MAX7327] = GROUP_A(IO_4P4O) | GROUP_B(IO_8O) | INT_CAPS(INT_NO_MASK),
+};
+
 static const struct i2c_device_id max732x_id[] = {
-       { "max7319", GROUP_A(IO_8I) },
-       { "max7320", GROUP_B(IO_8O) },
-       { "max7321", GROUP_A(IO_8P) },
-       { "max7322", GROUP_A(IO_4I4O) },
-       { "max7323", GROUP_A(IO_4P4O) },
-       { "max7324", GROUP_A(IO_8I) | GROUP_B(IO_8O) },
-       { "max7325", GROUP_A(IO_8P) | GROUP_B(IO_8O) },
-       { "max7326", GROUP_A(IO_4I4O) | GROUP_B(IO_8O) },
-       { "max7327", GROUP_A(IO_4P4O) | GROUP_B(IO_8O) },
+       { "max7319", MAX7319 },
+       { "max7320", MAX7320 },
+       { "max7321", MAX7321 },
+       { "max7322", MAX7322 },
+       { "max7323", MAX7323 },
+       { "max7324", MAX7324 },
+       { "max7325", MAX7325 },
+       { "max7326", MAX7326 },
+       { "max7327", MAX7327 },
        { },
 };
 MODULE_DEVICE_TABLE(i2c, max732x_id);
@@ -96,9 +130,19 @@ struct max732x_chip {
 
        struct mutex    lock;
        uint8_t         reg_out[2];
+
+#ifdef CONFIG_GPIO_MAX732X_IRQ
+       struct mutex    irq_lock;
+       int             irq_base;
+       uint8_t         irq_mask;
+       uint8_t         irq_mask_cur;
+       uint8_t         irq_trig_raise;
+       uint8_t         irq_trig_fall;
+       uint8_t         irq_features;
+#endif
 };
 
-static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val)
+static int max732x_writeb(struct max732x_chip *chip, int group_a, uint8_t val)
 {
        struct i2c_client *client;
        int ret;
@@ -113,7 +157,7 @@ static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val)
        return 0;
 }
 
-static int max732x_read(struct max732x_chip *chip, int group_a, uint8_t *val)
+static int max732x_readb(struct max732x_chip *chip, int group_a, uint8_t *val)
 {
        struct i2c_client *client;
        int ret;
@@ -142,7 +186,7 @@ static int max732x_gpio_get_value(struct gpio_chip *gc, unsigned off)
 
        chip = container_of(gc, struct max732x_chip, gpio_chip);
 
-       ret = max732x_read(chip, is_group_a(chip, off), &reg_val);
+       ret = max732x_readb(chip, is_group_a(chip, off), &reg_val);
        if (ret < 0)
                return 0;
 
@@ -162,7 +206,7 @@ static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
        reg_out = (off > 7) ? chip->reg_out[1] : chip->reg_out[0];
        reg_out = (val) ? reg_out | mask : reg_out & ~mask;
 
-       ret = max732x_write(chip, is_group_a(chip, off), reg_out);
+       ret = max732x_writeb(chip, is_group_a(chip, off), reg_out);
        if (ret < 0)
                goto out;
 
@@ -188,6 +232,13 @@ static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
                return -EACCES;
        }
 
+       /*
+        * Open-drain pins must be set to high impedance (which is
+        * equivalent to output-high) to be turned into an input.
+        */
+       if ((mask & chip->dir_output))
+               max732x_gpio_set_value(gc, off, 1);
+
        return 0;
 }
 
@@ -209,12 +260,278 @@ static int max732x_gpio_direction_output(struct gpio_chip *gc,
        return 0;
 }
 
+#ifdef CONFIG_GPIO_MAX732X_IRQ
+static int max732x_writew(struct max732x_chip *chip, uint16_t val)
+{
+       int ret;
+
+       val = cpu_to_le16(val);
+
+       ret = i2c_master_send(chip->client_group_a, (char *)&val, 2);
+       if (ret < 0) {
+               dev_err(&chip->client_group_a->dev, "failed writing\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static int max732x_readw(struct max732x_chip *chip, uint16_t *val)
+{
+       int ret;
+
+       ret = i2c_master_recv(chip->client_group_a, (char *)val, 2);
+       if (ret < 0) {
+               dev_err(&chip->client_group_a->dev, "failed reading\n");
+               return ret;
+       }
+
+       *val = le16_to_cpu(*val);
+       return 0;
+}
+
+static void max732x_irq_update_mask(struct max732x_chip *chip)
+{
+       uint16_t msg;
+
+       if (chip->irq_mask == chip->irq_mask_cur)
+               return;
+
+       chip->irq_mask = chip->irq_mask_cur;
+
+       if (chip->irq_features == INT_NO_MASK)
+               return;
+
+       mutex_lock(&chip->lock);
+
+       switch (chip->irq_features) {
+       case INT_INDEP_MASK:
+               msg = (chip->irq_mask << 8) | chip->reg_out[0];
+               max732x_writew(chip, msg);
+               break;
+
+       case INT_MERGED_MASK:
+               msg = chip->irq_mask | chip->reg_out[0];
+               max732x_writeb(chip, 1, (uint8_t)msg);
+               break;
+       }
+
+       mutex_unlock(&chip->lock);
+}
+
+static int max732x_gpio_to_irq(struct gpio_chip *gc, unsigned off)
+{
+       struct max732x_chip *chip;
+
+       chip = container_of(gc, struct max732x_chip, gpio_chip);
+       return chip->irq_base + off;
+}
+
+static void max732x_irq_mask(unsigned int irq)
+{
+       struct max732x_chip *chip = get_irq_chip_data(irq);
+
+       chip->irq_mask_cur &= ~(1 << (irq - chip->irq_base));
+}
+
+static void max732x_irq_unmask(unsigned int irq)
+{
+       struct max732x_chip *chip = get_irq_chip_data(irq);
+
+       chip->irq_mask_cur |= 1 << (irq - chip->irq_base);
+}
+
+static void max732x_irq_bus_lock(unsigned int irq)
+{
+       struct max732x_chip *chip = get_irq_chip_data(irq);
+
+       mutex_lock(&chip->irq_lock);
+       chip->irq_mask_cur = chip->irq_mask;
+}
+
+static void max732x_irq_bus_sync_unlock(unsigned int irq)
+{
+       struct max732x_chip *chip = get_irq_chip_data(irq);
+
+       max732x_irq_update_mask(chip);
+       mutex_unlock(&chip->irq_lock);
+}
+
+static int max732x_irq_set_type(unsigned int irq, unsigned int type)
+{
+       struct max732x_chip *chip = get_irq_chip_data(irq);
+       uint16_t off = irq - chip->irq_base;
+       uint16_t mask = 1 << off;
+
+       if (!(mask & chip->dir_input)) {
+               dev_dbg(&chip->client->dev, "%s port %d is output only\n",
+                       chip->client->name, off);
+               return -EACCES;
+       }
+
+       if (!(type & IRQ_TYPE_EDGE_BOTH)) {
+               dev_err(&chip->client->dev, "irq %d: unsupported type %d\n",
+                       irq, type);
+               return -EINVAL;
+       }
+
+       if (type & IRQ_TYPE_EDGE_FALLING)
+               chip->irq_trig_fall |= mask;
+       else
+               chip->irq_trig_fall &= ~mask;
+
+       if (type & IRQ_TYPE_EDGE_RISING)
+               chip->irq_trig_raise |= mask;
+       else
+               chip->irq_trig_raise &= ~mask;
+
+       return max732x_gpio_direction_input(&chip->gpio_chip, off);
+}
+
+static struct irq_chip max732x_irq_chip = {
+       .name                   = "max732x",
+       .mask                   = max732x_irq_mask,
+       .unmask                 = max732x_irq_unmask,
+       .bus_lock               = max732x_irq_bus_lock,
+       .bus_sync_unlock        = max732x_irq_bus_sync_unlock,
+       .set_type               = max732x_irq_set_type,
+};
+
+static uint8_t max732x_irq_pending(struct max732x_chip *chip)
+{
+       uint8_t cur_stat;
+       uint8_t old_stat;
+       uint8_t trigger;
+       uint8_t pending;
+       uint16_t status;
+       int ret;
+
+       ret = max732x_readw(chip, &status);
+       if (ret)
+               return 0;
+
+       trigger = status >> 8;
+       trigger &= chip->irq_mask;
+
+       if (!trigger)
+               return 0;
+
+       cur_stat = status & 0xFF;
+       cur_stat &= chip->irq_mask;
+
+       old_stat = cur_stat ^ trigger;
+
+       pending = (old_stat & chip->irq_trig_fall) |
+                 (cur_stat & chip->irq_trig_raise);
+       pending &= trigger;
+
+       return pending;
+}
+
+static irqreturn_t max732x_irq_handler(int irq, void *devid)
+{
+       struct max732x_chip *chip = devid;
+       uint8_t pending;
+       uint8_t level;
+
+       pending = max732x_irq_pending(chip);
+
+       if (!pending)
+               return IRQ_HANDLED;
+
+       do {
+               level = __ffs(pending);
+               handle_nested_irq(level + chip->irq_base);
+
+               pending &= ~(1 << level);
+       } while (pending);
+
+       return IRQ_HANDLED;
+}
+
+static int max732x_irq_setup(struct max732x_chip *chip,
+                            const struct i2c_device_id *id)
+{
+       struct i2c_client *client = chip->client;
+       struct max732x_platform_data *pdata = client->dev.platform_data;
+       int has_irq = max732x_features[id->driver_data] >> 32;
+       int ret;
+
+       if (pdata->irq_base && has_irq != INT_NONE) {
+               int lvl;
+
+               chip->irq_base = pdata->irq_base;
+               chip->irq_features = has_irq;
+               mutex_init(&chip->irq_lock);
+
+               for (lvl = 0; lvl < chip->gpio_chip.ngpio; lvl++) {
+                       int irq = lvl + chip->irq_base;
+
+                       if (!(chip->dir_input & (1 << lvl)))
+                               continue;
+
+                       set_irq_chip_data(irq, chip);
+                       set_irq_chip_and_handler(irq, &max732x_irq_chip,
+                                                handle_edge_irq);
+                       set_irq_nested_thread(irq, 1);
+#ifdef CONFIG_ARM
+                       set_irq_flags(irq, IRQF_VALID);
+#else
+                       set_irq_noprobe(irq);
+#endif
+               }
+
+               ret = request_threaded_irq(client->irq,
+                                          NULL,
+                                          max732x_irq_handler,
+                                          IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+                                          dev_name(&client->dev), chip);
+               if (ret) {
+                       dev_err(&client->dev, "failed to request irq %d\n",
+                               client->irq);
+                       goto out_failed;
+               }
+
+               chip->gpio_chip.to_irq = max732x_gpio_to_irq;
+       }
+
+       return 0;
+
+out_failed:
+       chip->irq_base = 0;
+       return ret;
+}
+
+static void max732x_irq_teardown(struct max732x_chip *chip)
+{
+       if (chip->irq_base)
+               free_irq(chip->client->irq, chip);
+}
+#else /* CONFIG_GPIO_MAX732X_IRQ */
+static int max732x_irq_setup(struct max732x_chip *chip,
+                            const struct i2c_device_id *id)
+{
+       struct i2c_client *client = chip->client;
+       struct max732x_platform_data *pdata = client->dev.platform_data;
+       int has_irq = max732x_features[id->driver_data] >> 32;
+
+       if (pdata->irq_base && has_irq != INT_NONE)
+               dev_warn(&client->dev, "interrupt support not compiled in\n");
+
+       return 0;
+}
+
+static void max732x_irq_teardown(struct max732x_chip *chip)
+{
+}
+#endif
+
 static int __devinit max732x_setup_gpio(struct max732x_chip *chip,
                                        const struct i2c_device_id *id,
                                        unsigned gpio_start)
 {
        struct gpio_chip *gc = &chip->gpio_chip;
-       uint32_t id_data = id->driver_data;
+       uint32_t id_data = (uint32_t)max732x_features[id->driver_data];
        int i, port = 0;
 
        for (i = 0; i < 16; i++, id_data >>= 2) {
@@ -285,14 +602,14 @@ static int __devinit max732x_probe(struct i2c_client *client,
        switch (client->addr & 0x70) {
        case 0x60:
                chip->client_group_a = client;
-               if (nr_port > 7) {
+               if (nr_port > 8) {
                        c = i2c_new_dummy(client->adapter, addr_b);
                        chip->client_group_b = chip->client_dummy = c;
                }
                break;
        case 0x50:
                chip->client_group_b = client;
-               if (nr_port > 7) {
+               if (nr_port > 8) {
                        c = i2c_new_dummy(client->adapter, addr_a);
                        chip->client_group_a = chip->client_dummy = c;
                }
@@ -306,9 +623,13 @@ static int __devinit max732x_probe(struct i2c_client *client,
 
        mutex_init(&chip->lock);
 
-       max732x_read(chip, is_group_a(chip, 0), &chip->reg_out[0]);
-       if (nr_port > 7)
-               max732x_read(chip, is_group_a(chip, 8), &chip->reg_out[1]);
+       max732x_readb(chip, is_group_a(chip, 0), &chip->reg_out[0]);
+       if (nr_port > 8)
+               max732x_readb(chip, is_group_a(chip, 8), &chip->reg_out[1]);
+
+       ret = max732x_irq_setup(chip, id);
+       if (ret)
+               goto out_failed;
 
        ret = gpiochip_add(&chip->gpio_chip);
        if (ret)
@@ -325,6 +646,7 @@ static int __devinit max732x_probe(struct i2c_client *client,
        return 0;
 
 out_failed:
+       max732x_irq_teardown(chip);
        kfree(chip);
        return ret;
 }
@@ -352,6 +674,8 @@ static int __devexit max732x_remove(struct i2c_client *client)
                return ret;
        }
 
+       max732x_irq_teardown(chip);
+
        /* unregister any dummy i2c_client */
        if (chip->client_dummy)
                i2c_unregister_device(chip->client_dummy);
index f156ab3..a2b12aa 100644 (file)
@@ -73,7 +73,7 @@ struct pca953x_chip {
        struct i2c_client *client;
        struct pca953x_platform_data *dyn_pdata;
        struct gpio_chip gpio_chip;
-       char **names;
+       const char *const *names;
 };
 
 static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val)
index 105701a..ee568c8 100644 (file)
@@ -164,7 +164,7 @@ static int pl061_irq_type(unsigned irq, unsigned trigger)
        unsigned long flags;
        u8 gpiois, gpioibe, gpioiev;
 
-       if (offset < 0 || offset > PL061_GPIO_NR)
+       if (offset < 0 || offset >= PL061_GPIO_NR)
                return -EINVAL;
 
        spin_lock_irqsave(&chip->irq_lock, flags);
index f569ae8..c198186 100644 (file)
@@ -147,7 +147,10 @@ drm_edid_block_valid(u8 *raw_edid)
                csum += raw_edid[i];
        if (csum) {
                DRM_ERROR("EDID checksum is invalid, remainder is %d\n", csum);
-               goto bad;
+
+               /* allow CEA to slide through, switches mangle this */
+               if (raw_edid[0] != 0x02)
+                       goto bad;
        }
 
        /* per-block-type checks */
index 7e663a7..266b0ff 100644 (file)
@@ -241,7 +241,8 @@ nouveau_connector_detect(struct drm_connector *connector)
        if (nv_encoder && nv_connector->native_mode) {
                unsigned status = connector_status_connected;
 
-#ifdef CONFIG_ACPI
+#if defined(CONFIG_ACPI_BUTTON) || \
+       (defined(CONFIG_ACPI_BUTTON_MODULE) && defined(MODULE))
                if (!nouveau_ignorelid && !acpi_lid_open())
                        status = connector_status_unknown;
 #endif
index 0616c96..704a25d 100644 (file)
@@ -253,7 +253,11 @@ nv40_graph_init(struct drm_device *dev)
 
        if (!dev_priv->engine.graph.ctxprog) {
                struct nouveau_grctx ctx = {};
-               uint32_t cp[256];
+               uint32_t *cp;
+
+               cp = kmalloc(sizeof(*cp) * 256, GFP_KERNEL);
+               if (!cp)
+                       return -ENOMEM;
 
                ctx.dev = dev;
                ctx.mode = NOUVEAU_GRCTX_PROG;
@@ -265,6 +269,8 @@ nv40_graph_init(struct drm_device *dev)
                nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
                for (i = 0; i < ctx.ctxprog_len; i++)
                        nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp[i]);
+
+               kfree(cp);
        }
 
        /* No context present currently */
index 03dd6c4..f3f2827 100644 (file)
@@ -707,6 +707,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
                break;
        case ATOM_DCPLL:
        case ATOM_PPLL_INVALID:
+       default:
                pll = &rdev->clock.dcpll;
                break;
        }
index 66a37fb..669feb6 100644 (file)
@@ -576,6 +576,7 @@ typedef int (*radeon_packet3_check_t)(struct radeon_cs_parser *p,
  */
 int radeon_agp_init(struct radeon_device *rdev);
 void radeon_agp_resume(struct radeon_device *rdev);
+void radeon_agp_suspend(struct radeon_device *rdev);
 void radeon_agp_fini(struct radeon_device *rdev);
 
 
index 28e473f..f40dfb7 100644 (file)
@@ -270,3 +270,8 @@ void radeon_agp_fini(struct radeon_device *rdev)
        }
 #endif
 }
+
+void radeon_agp_suspend(struct radeon_device *rdev)
+{
+       radeon_agp_fini(rdev);
+}
index 6e733fd..24ea683 100644 (file)
@@ -680,10 +680,18 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct
        uint8_t dac;
        union atom_supported_devices *supported_devices;
        int i, j, max_device;
-       struct bios_connector bios_connectors[ATOM_MAX_SUPPORTED_DEVICE];
+       struct bios_connector *bios_connectors;
+       size_t bc_size = sizeof(*bios_connectors) * ATOM_MAX_SUPPORTED_DEVICE;
 
-       if (!atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset))
+       bios_connectors = kzalloc(bc_size, GFP_KERNEL);
+       if (!bios_connectors)
+               return false;
+
+       if (!atom_parse_data_header(ctx, index, &size, &frev, &crev,
+                                   &data_offset)) {
+               kfree(bios_connectors);
                return false;
+       }
 
        supported_devices =
            (union atom_supported_devices *)(ctx->bios + data_offset);
@@ -851,6 +859,7 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct
 
        radeon_link_encoder_connector(dev);
 
+       kfree(bios_connectors);
        return true;
 }
 
index a20b612..fdc3fdf 100644 (file)
@@ -754,6 +754,8 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
        /* evict remaining vram memory */
        radeon_bo_evict_vram(rdev);
 
+       radeon_agp_suspend(rdev);
+
        pci_save_state(dev->pdev);
        if (state.event == PM_EVENT_SUSPEND) {
                /* Shut down the device */
index 56f314f..c940267 100644 (file)
@@ -811,7 +811,7 @@ static const char *relatives[REL_MAX + 1] = {
        [REL_WHEEL] = "Wheel",          [REL_MISC] = "Misc",
 };
 
-static const char *absolutes[ABS_MAX + 1] = {
+static const char *absolutes[ABS_CNT] = {
        [ABS_X] = "X",                  [ABS_Y] = "Y",
        [ABS_Z] = "Z",                  [ABS_RX] = "Rx",
        [ABS_RY] = "Ry",                [ABS_RZ] = "Rz",
index b9e517d..3feaa26 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/zorro.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
 
 #include <asm/setup.h>
 #include <asm/amigahw.h>
 
 
     /*
-     *  Bases of the IDE interfaces
-     */
-
-#define GAYLE_BASE_4000        0xdd2020        /* A4000/A4000T */
-#define GAYLE_BASE_1200        0xda0000        /* A1200/A600 and E-Matrix 530 */
-
-#define GAYLE_IDEREG_SIZE      0x2000
-
-    /*
      *  Offsets from one of the above bases
      */
 
@@ -68,20 +60,20 @@ MODULE_PARM_DESC(doubler, "enable support for IDE doublers");
 
 static int gayle_test_irq(ide_hwif_t *hwif)
 {
-    unsigned char ch;
+       unsigned char ch;
 
-    ch = z_readb(hwif->io_ports.irq_addr);
-    if (!(ch & GAYLE_IRQ_IDE))
-       return 0;
-    return 1;
+       ch = z_readb(hwif->io_ports.irq_addr);
+       if (!(ch & GAYLE_IRQ_IDE))
+               return 0;
+       return 1;
 }
 
 static void gayle_a1200_clear_irq(ide_drive_t *drive)
 {
-    ide_hwif_t *hwif = drive->hwif;
+       ide_hwif_t *hwif = drive->hwif;
 
-    (void)z_readb(hwif->io_ports.status_addr);
-    z_writeb(0x7c, hwif->io_ports.irq_addr);
+       (void)z_readb(hwif->io_ports.status_addr);
+       z_writeb(0x7c, hwif->io_ports.irq_addr);
 }
 
 static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
@@ -122,64 +114,89 @@ static const struct ide_port_info gayle_port_info = {
      *  Probe for a Gayle IDE interface (and optionally for an IDE doubler)
      */
 
-static int __init gayle_init(void)
+static int __init amiga_gayle_ide_probe(struct platform_device *pdev)
 {
-    unsigned long phys_base, res_start, res_n;
-    unsigned long base, ctrlport, irqport;
-    int a4000, i, rc;
-    struct ide_hw hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
-    struct ide_port_info d = gayle_port_info;
-
-    if (!MACH_IS_AMIGA)
-       return -ENODEV;
-
-    if ((a4000 = AMIGAHW_PRESENT(A4000_IDE)) || AMIGAHW_PRESENT(A1200_IDE))
-       goto found;
-
-#ifdef CONFIG_ZORRO
-    if (zorro_find_device(ZORRO_PROD_MTEC_VIPER_MK_V_E_MATRIX_530_SCSI_IDE,
-                         NULL))
-       goto found;
-#endif
-    return -ENODEV;
-
-found:
-       printk(KERN_INFO "ide: Gayle IDE controller (A%d style%s)\n",
-                        a4000 ? 4000 : 1200,
-                        ide_doubler ? ", IDE doubler" : "");
-
-       if (a4000) {
-           phys_base = GAYLE_BASE_4000;
-           irqport = (unsigned long)ZTWO_VADDR(GAYLE_IRQ_4000);
-           d.port_ops = &gayle_a4000_port_ops;
-       } else {
-           phys_base = GAYLE_BASE_1200;
-           irqport = (unsigned long)ZTWO_VADDR(GAYLE_IRQ_1200);
-           d.port_ops = &gayle_a1200_port_ops;
+       struct resource *res;
+       struct gayle_ide_platform_data *pdata;
+       unsigned long base, ctrlport, irqport;
+       unsigned int i;
+       int error;
+       struct ide_hw hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
+       struct ide_port_info d = gayle_port_info;
+       struct ide_host *host;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
+
+       if (!request_mem_region(res->start, resource_size(res), "IDE"))
+               return -EBUSY;
+
+       pdata = pdev->dev.platform_data;
+       pr_info("ide: Gayle IDE controller (A%u style%s)\n",
+               pdata->explicit_ack ? 1200 : 4000,
+               ide_doubler ? ", IDE doubler" : "");
+
+       base = (unsigned long)ZTWO_VADDR(pdata->base);
+       ctrlport = 0;
+       irqport = (unsigned long)ZTWO_VADDR(pdata->irqport);
+       if (pdata->explicit_ack)
+               d.port_ops = &gayle_a1200_port_ops;
+       else
+               d.port_ops = &gayle_a4000_port_ops;
+
+       for (i = 0; i < GAYLE_NUM_PROBE_HWIFS; i++, base += GAYLE_NEXT_PORT) {
+               if (GAYLE_HAS_CONTROL_REG)
+                       ctrlport = base + GAYLE_CONTROL;
+
+               gayle_setup_ports(&hw[i], base, ctrlport, irqport);
+               hws[i] = &hw[i];
        }
 
-       res_start = ((unsigned long)phys_base) & ~(GAYLE_NEXT_PORT-1);
-       res_n = GAYLE_IDEREG_SIZE;
+       error = ide_host_add(&d, hws, i, &host);
+       if (error)
+               goto out;
 
-       if (!request_mem_region(res_start, res_n, "IDE"))
-               return -EBUSY;
+       platform_set_drvdata(pdev, host);
+       return 0;
 
-    for (i = 0; i < GAYLE_NUM_PROBE_HWIFS; i++) {
-       base = (unsigned long)ZTWO_VADDR(phys_base + i * GAYLE_NEXT_PORT);
-       ctrlport = GAYLE_HAS_CONTROL_REG ? (base + GAYLE_CONTROL) : 0;
+out:
+       release_mem_region(res->start, resource_size(res));
+       return error;
+}
+
+static int __exit amiga_gayle_ide_remove(struct platform_device *pdev)
+{
+       struct ide_host *host = platform_get_drvdata(pdev);
+       struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+       ide_host_remove(host);
+       release_mem_region(res->start, resource_size(res));
+       return 0;
+}
 
-       gayle_setup_ports(&hw[i], base, ctrlport, irqport);
+static struct platform_driver amiga_gayle_ide_driver = {
+       .remove = __exit_p(amiga_gayle_ide_remove),
+       .driver   = {
+               .name   = "amiga-gayle-ide",
+               .owner  = THIS_MODULE,
+       },
+};
 
-       hws[i] = &hw[i];
-    }
+static int __init amiga_gayle_ide_init(void)
+{
+       return platform_driver_probe(&amiga_gayle_ide_driver,
+                                    amiga_gayle_ide_probe);
+}
 
-    rc = ide_host_add(&d, hws, i, NULL);
-    if (rc)
-       release_mem_region(res_start, res_n);
+module_init(amiga_gayle_ide_init);
 
-    return rc;
+static void __exit amiga_gayle_ide_exit(void)
+{
+       platform_driver_unregister(&amiga_gayle_ide_driver);
 }
 
-module_init(gayle_init);
+module_exit(amiga_gayle_ide_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:amiga-gayle-ide");
index 9fd4a0d..adaefab 100644 (file)
@@ -1824,7 +1824,7 @@ static int dv1394_open(struct inode *inode, struct file *file)
               "and will not be available in the new firewire driver stack. "
               "Try libraw1394 based programs instead.\n", current->comm);
 
-       return 0;
+       return nonseekable_open(inode, file);
 }
 
 
@@ -2153,17 +2153,18 @@ static struct cdev dv1394_cdev;
 static const struct file_operations dv1394_fops=
 {
        .owner =        THIS_MODULE,
-       .poll =         dv1394_poll,
+       .poll =         dv1394_poll,
        .unlocked_ioctl = dv1394_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = dv1394_compat_ioctl,
 #endif
        .mmap =         dv1394_mmap,
        .open =         dv1394_open,
-       .write =        dv1394_write,
-       .read =         dv1394_read,
+       .write =        dv1394_write,
+       .read =         dv1394_read,
        .release =      dv1394_release,
-       .fasync =       dv1394_fasync,
+       .fasync =       dv1394_fasync,
+       .llseek =       no_llseek,
 };
 
 
index 8aa56ac..b563d5e 100644 (file)
@@ -2834,7 +2834,7 @@ static int raw1394_open(struct inode *inode, struct file *file)
 
        file->private_data = fi;
 
-       return 0;
+       return nonseekable_open(inode, file);
 }
 
 static int raw1394_release(struct inode *inode, struct file *file)
@@ -3035,6 +3035,7 @@ static const struct file_operations raw1394_fops = {
        .poll = raw1394_poll,
        .open = raw1394_open,
        .release = raw1394_release,
+       .llseek = no_llseek,
 };
 
 static int __init init_raw1394(void)
index 949064a..a42bd68 100644 (file)
@@ -1239,7 +1239,7 @@ static int video1394_open(struct inode *inode, struct file *file)
        ctx->current_ctx = NULL;
        file->private_data = ctx;
 
-       return 0;
+       return nonseekable_open(inode, file);
 }
 
 static int video1394_release(struct inode *inode, struct file *file)
@@ -1287,7 +1287,8 @@ static const struct file_operations video1394_fops=
        .poll =         video1394_poll,
        .mmap =         video1394_mmap,
        .open =         video1394_open,
-       .release =      video1394_release
+       .release =      video1394_release,
+       .llseek =       no_llseek,
 };
 
 /*** HOTPLUG STUFF **********************************************************/
index 07cae55..e571e60 100644 (file)
@@ -847,7 +847,7 @@ static int __cpuinit comp_pool_callback(struct notifier_block *nfb,
                ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
                if (!create_comp_task(pool, cpu)) {
                        ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(-ENOMEM);
                }
                break;
        case CPU_UP_CANCELED:
index 423e0e6..34157bb 100644 (file)
@@ -47,15 +47,15 @@ struct joydev {
        struct mutex mutex;
        struct device dev;
 
-       struct js_corr corr[ABS_MAX + 1];
+       struct js_corr corr[ABS_CNT];
        struct JS_DATA_SAVE_TYPE glue;
        int nabs;
        int nkey;
        __u16 keymap[KEY_MAX - BTN_MISC + 1];
        __u16 keypam[KEY_MAX - BTN_MISC + 1];
-       __u8 absmap[ABS_MAX + 1];
-       __u8 abspam[ABS_MAX + 1];
-       __s16 abs[ABS_MAX + 1];
+       __u8 absmap[ABS_CNT];
+       __u8 abspam[ABS_CNT];
+       __s16 abs[ABS_CNT];
 };
 
 struct joydev_client {
@@ -826,7 +826,7 @@ static int joydev_connect(struct input_handler *handler, struct input_dev *dev,
        joydev->handle.handler = handler;
        joydev->handle.private = joydev;
 
-       for (i = 0; i < ABS_MAX + 1; i++)
+       for (i = 0; i < ABS_CNT; i++)
                if (test_bit(i, dev->absbit)) {
                        joydev->absmap[i] = joydev->nabs;
                        joydev->abspam[joydev->nabs] = i;
index 35149ec..79172af 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/keyboard.h>
+#include <linux/platform_device.h>
 
 #include <asm/amigaints.h>
 #include <asm/amigahw.h>
@@ -154,10 +155,9 @@ static const char *amikbd_messages[8] = {
        [7] = KERN_WARNING "amikbd: keyboard interrupt\n"
 };
 
-static struct input_dev *amikbd_dev;
-
-static irqreturn_t amikbd_interrupt(int irq, void *dummy)
+static irqreturn_t amikbd_interrupt(int irq, void *data)
 {
+       struct input_dev *dev = data;
        unsigned char scancode, down;
 
        scancode = ~ciaa.sdr;           /* get and invert scancode (keyboard is active low) */
@@ -170,47 +170,42 @@ static irqreturn_t amikbd_interrupt(int irq, void *dummy)
 
        if (scancode < 0x78) {          /* scancodes < 0x78 are keys */
                if (scancode == 98) {   /* CapsLock is a toggle switch key on Amiga */
-                       input_report_key(amikbd_dev, scancode, 1);
-                       input_report_key(amikbd_dev, scancode, 0);
+                       input_report_key(dev, scancode, 1);
+                       input_report_key(dev, scancode, 0);
                } else {
-                       input_report_key(amikbd_dev, scancode, down);
+                       input_report_key(dev, scancode, down);
                }
 
-               input_sync(amikbd_dev);
+               input_sync(dev);
        } else                          /* scancodes >= 0x78 are error codes */
                printk(amikbd_messages[scancode - 0x78]);
 
        return IRQ_HANDLED;
 }
 
-static int __init amikbd_init(void)
+static int __init amikbd_probe(struct platform_device *pdev)
 {
+       struct input_dev *dev;
        int i, j, err;
 
-       if (!AMIGAHW_PRESENT(AMI_KEYBOARD))
-               return -ENODEV;
-
-       if (!request_mem_region(CIAA_PHYSADDR-1+0xb00, 0x100, "amikeyb"))
-               return -EBUSY;
-
-       amikbd_dev = input_allocate_device();
-       if (!amikbd_dev) {
-               printk(KERN_ERR "amikbd: not enough memory for input device\n");
-               err = -ENOMEM;
-               goto fail1;
+       dev = input_allocate_device();
+       if (!dev) {
+               dev_err(&pdev->dev, "Not enough memory for input device\n");
+               return -ENOMEM;
        }
 
-       amikbd_dev->name = "Amiga Keyboard";
-       amikbd_dev->phys = "amikbd/input0";
-       amikbd_dev->id.bustype = BUS_AMIGA;
-       amikbd_dev->id.vendor = 0x0001;
-       amikbd_dev->id.product = 0x0001;
-       amikbd_dev->id.version = 0x0100;
+       dev->name = pdev->name;
+       dev->phys = "amikbd/input0";
+       dev->id.bustype = BUS_AMIGA;
+       dev->id.vendor = 0x0001;
+       dev->id.product = 0x0001;
+       dev->id.version = 0x0100;
+       dev->dev.parent = &pdev->dev;
 
-       amikbd_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+       dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 
        for (i = 0; i < 0x78; i++)
-               set_bit(i, amikbd_dev->keybit);
+               set_bit(i, dev->keybit);
 
        for (i = 0; i < MAX_NR_KEYMAPS; i++) {
                static u_short temp_map[NR_KEYS] __initdata;
@@ -229,30 +224,54 @@ static int __init amikbd_init(void)
                memcpy(key_maps[i], temp_map, sizeof(temp_map));
        }
        ciaa.cra &= ~0x41;       /* serial data in, turn off TA */
-       if (request_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt, 0, "amikbd",
-                       amikbd_interrupt)) {
-               err = -EBUSY;
+       err = request_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt, 0, "amikbd",
+                         dev);
+       if (err)
                goto fail2;
-       }
 
-       err = input_register_device(amikbd_dev);
+       err = input_register_device(dev);
        if (err)
                goto fail3;
 
+       platform_set_drvdata(pdev, dev);
+
        return 0;
 
- fail3:        free_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt);
- fail2:        input_free_device(amikbd_dev);
- fail1:        release_mem_region(CIAA_PHYSADDR - 1 + 0xb00, 0x100);
+ fail3:        free_irq(IRQ_AMIGA_CIAA_SP, dev);
+ fail2:        input_free_device(dev);
        return err;
 }
 
-static void __exit amikbd_exit(void)
+static int __exit amikbd_remove(struct platform_device *pdev)
+{
+       struct input_dev *dev = platform_get_drvdata(pdev);
+
+       platform_set_drvdata(pdev, NULL);
+       free_irq(IRQ_AMIGA_CIAA_SP, dev);
+       input_unregister_device(dev);
+       return 0;
+}
+
+static struct platform_driver amikbd_driver = {
+       .remove = __exit_p(amikbd_remove),
+       .driver   = {
+               .name   = "amiga-keyboard",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init amikbd_init(void)
 {
-       free_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt);
-       input_unregister_device(amikbd_dev);
-       release_mem_region(CIAA_PHYSADDR - 1 + 0xb00, 0x100);
+       return platform_driver_probe(&amikbd_driver, amikbd_probe);
 }
 
 module_init(amikbd_init);
+
+static void __exit amikbd_exit(void)
+{
+       platform_driver_unregister(&amikbd_driver);
+}
+
 module_exit(amikbd_exit);
+
+MODULE_ALIAS("platform:amiga-keyboard");
index 48cdabe..c44b9ea 100644 (file)
@@ -80,6 +80,16 @@ config INPUT_M68K_BEEP
        tristate "M68k Beeper support"
        depends on M68K
 
+config INPUT_MAX8925_ONKEY
+       tristate "MAX8925 ONKEY support"
+       depends on MFD_MAX8925
+       help
+         Support the ONKEY of MAX8925 PMICs as an input device
+         reporting power button status.
+
+         To compile this driver as a module, choose M here: the module
+         will be called max8925_onkey.
+
 config INPUT_APANEL
        tristate "Fujitsu Lifebook Application Panel buttons"
        depends on X86 && I2C && LEDS_CLASS
index f9f5770..71fe57d 100644 (file)
@@ -20,6 +20,7 @@ obj-$(CONFIG_HP_SDC_RTC)              += hp_sdc_rtc.o
 obj-$(CONFIG_INPUT_IXP4XX_BEEPER)      += ixp4xx-beeper.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)     += keyspan_remote.o
 obj-$(CONFIG_INPUT_M68K_BEEP)          += m68kspkr.o
+obj-$(CONFIG_INPUT_MAX8925_ONKEY)      += max8925_onkey.o
 obj-$(CONFIG_INPUT_PCAP)               += pcap_keys.o
 obj-$(CONFIG_INPUT_PCF50633_PMU)       += pcf50633-input.o
 obj-$(CONFIG_INPUT_PCF8574)            += pcf8574_keypad.o
diff --git a/drivers/input/misc/max8925_onkey.c b/drivers/input/misc/max8925_onkey.c
new file mode 100644 (file)
index 0000000..80af446
--- /dev/null
@@ -0,0 +1,148 @@
+/**
+ * max8925_onkey.c - MAX8925 ONKEY driver
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ *      Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/max8925.h>
+#include <linux/slab.h>
+
+#define HARDRESET_EN           (1 << 7)
+#define PWREN_EN               (1 << 7)
+
+struct max8925_onkey_info {
+       struct input_dev        *idev;
+       struct i2c_client       *i2c;
+       int                     irq;
+};
+
+/*
+ * MAX8925 gives us an interrupt when ONKEY is held for 3 seconds.
+ * max8925_set_bits() operates I2C bus and may sleep. So implement
+ * it in thread IRQ handler.
+ */
+static irqreturn_t max8925_onkey_handler(int irq, void *data)
+{
+       struct max8925_onkey_info *info = data;
+
+       input_report_key(info->idev, KEY_POWER, 1);
+       input_sync(info->idev);
+
+       /* Enable hardreset to halt if system isn't shutdown on time */
+       max8925_set_bits(info->i2c, MAX8925_SYSENSEL,
+                        HARDRESET_EN, HARDRESET_EN);
+
+       return IRQ_HANDLED;
+}
+
+static int __devinit max8925_onkey_probe(struct platform_device *pdev)
+{
+       struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent);
+       struct max8925_onkey_info *info;
+       int error;
+
+       info = kzalloc(sizeof(struct max8925_onkey_info), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+
+       info->i2c = chip->i2c;
+       info->irq = chip->irq_base + MAX8925_IRQ_GPM_SW_3SEC;
+
+       info->idev = input_allocate_device();
+       if (!info->idev) {
+               dev_err(chip->dev, "Failed to allocate input dev\n");
+               error = -ENOMEM;
+               goto out_input;
+       }
+
+       info->idev->name = "max8925_on";
+       info->idev->phys = "max8925_on/input0";
+       info->idev->id.bustype = BUS_I2C;
+       info->idev->dev.parent = &pdev->dev;
+       info->idev->evbit[0] = BIT_MASK(EV_KEY);
+       info->idev->keybit[BIT_WORD(KEY_POWER)] = BIT_MASK(KEY_POWER);
+
+       error = request_threaded_irq(info->irq, NULL, max8925_onkey_handler,
+                                    IRQF_ONESHOT, "onkey", info);
+       if (error < 0) {
+               dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n",
+                       info->irq, error);
+               goto out_irq;
+       }
+
+       error = input_register_device(info->idev);
+       if (error) {
+               dev_err(chip->dev, "Can't register input device: %d\n", error);
+               goto out;
+       }
+
+       platform_set_drvdata(pdev, info);
+
+       return 0;
+
+out:
+       free_irq(info->irq, info);
+out_irq:
+       input_free_device(info->idev);
+out_input:
+       kfree(info);
+       return error;
+}
+
+static int __devexit max8925_onkey_remove(struct platform_device *pdev)
+{
+       struct max8925_onkey_info *info = platform_get_drvdata(pdev);
+
+       free_irq(info->irq, info);
+       input_unregister_device(info->idev);
+       kfree(info);
+
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+static struct platform_driver max8925_onkey_driver = {
+       .driver         = {
+               .name   = "max8925-onkey",
+               .owner  = THIS_MODULE,
+       },
+       .probe          = max8925_onkey_probe,
+       .remove         = __devexit_p(max8925_onkey_remove),
+};
+
+static int __init max8925_onkey_init(void)
+{
+       return platform_driver_register(&max8925_onkey_driver);
+}
+module_init(max8925_onkey_init);
+
+static void __exit max8925_onkey_exit(void)
+{
+       platform_driver_unregister(&max8925_onkey_driver);
+}
+module_exit(max8925_onkey_exit);
+
+MODULE_DESCRIPTION("Maxim MAX8925 ONKEY driver");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
index fee9eac..4f9b2af 100644 (file)
@@ -90,8 +90,8 @@ static void vibra_disable(struct vibra_info *info)
        twl_i2c_write_u8(TWL4030_MODULE_AUDIO_VOICE,
                         (reg & ~TWL4030_VIBRA_EN), TWL4030_REG_VIBRA_CTL);
 
-       twl4030_codec_disable_resource(TWL4030_CODEC_RES_POWER);
        twl4030_codec_disable_resource(TWL4030_CODEC_RES_APLL);
+       twl4030_codec_disable_resource(TWL4030_CODEC_RES_POWER);
 
        info->enabled = false;
 }
index 1477466..b71eb55 100644 (file)
@@ -300,7 +300,7 @@ static int uinput_validate_absbits(struct input_dev *dev)
        unsigned int cnt;
        int retval = 0;
 
-       for (cnt = 0; cnt < ABS_MAX + 1; cnt++) {
+       for (cnt = 0; cnt < ABS_CNT; cnt++) {
                if (!test_bit(cnt, dev->absbit))
                        continue;
 
@@ -387,7 +387,7 @@ static int uinput_setup_device(struct uinput_device *udev, const char __user *bu
        dev->id.product = user_dev->id.product;
        dev->id.version = user_dev->id.version;
 
-       size = sizeof(int) * (ABS_MAX + 1);
+       size = sizeof(int) * ABS_CNT;
        memcpy(dev->absmax, user_dev->absmax, size);
        memcpy(dev->absmin, user_dev->absmin, size);
        memcpy(dev->absfuzz, user_dev->absfuzz, size);
index a185ac7..ff5f61a 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
+#include <linux/platform_device.h>
 
 #include <asm/irq.h>
 #include <asm/setup.h>
@@ -34,10 +35,10 @@ MODULE_DESCRIPTION("Amiga mouse driver");
 MODULE_LICENSE("GPL");
 
 static int amimouse_lastx, amimouse_lasty;
-static struct input_dev *amimouse_dev;
 
-static irqreturn_t amimouse_interrupt(int irq, void *dummy)
+static irqreturn_t amimouse_interrupt(int irq, void *data)
 {
+       struct input_dev *dev = data;
        unsigned short joy0dat, potgor;
        int nx, ny, dx, dy;
 
@@ -59,14 +60,14 @@ static irqreturn_t amimouse_interrupt(int irq, void *dummy)
 
        potgor = amiga_custom.potgor;
 
-       input_report_rel(amimouse_dev, REL_X, dx);
-       input_report_rel(amimouse_dev, REL_Y, dy);
+       input_report_rel(dev, REL_X, dx);
+       input_report_rel(dev, REL_Y, dy);
 
-       input_report_key(amimouse_dev, BTN_LEFT,   ciaa.pra & 0x40);
-       input_report_key(amimouse_dev, BTN_MIDDLE, potgor & 0x0100);
-       input_report_key(amimouse_dev, BTN_RIGHT,  potgor & 0x0400);
+       input_report_key(dev, BTN_LEFT,   ciaa.pra & 0x40);
+       input_report_key(dev, BTN_MIDDLE, potgor & 0x0100);
+       input_report_key(dev, BTN_RIGHT,  potgor & 0x0400);
 
-       input_sync(amimouse_dev);
+       input_sync(dev);
 
        return IRQ_HANDLED;
 }
@@ -74,63 +75,90 @@ static irqreturn_t amimouse_interrupt(int irq, void *dummy)
 static int amimouse_open(struct input_dev *dev)
 {
        unsigned short joy0dat;
+       int error;
 
        joy0dat = amiga_custom.joy0dat;
 
        amimouse_lastx = joy0dat & 0xff;
        amimouse_lasty = joy0dat >> 8;
 
-       if (request_irq(IRQ_AMIGA_VERTB, amimouse_interrupt, 0, "amimouse", amimouse_interrupt)) {
-                printk(KERN_ERR "amimouse.c: Can't allocate irq %d\n", IRQ_AMIGA_VERTB);
-                return -EBUSY;
-        }
+       error = request_irq(IRQ_AMIGA_VERTB, amimouse_interrupt, 0, "amimouse",
+                           dev);
+       if (error)
+               dev_err(&dev->dev, "Can't allocate irq %d\n", IRQ_AMIGA_VERTB);
 
-        return 0;
+       return error;
 }
 
 static void amimouse_close(struct input_dev *dev)
 {
-       free_irq(IRQ_AMIGA_VERTB, amimouse_interrupt);
+       free_irq(IRQ_AMIGA_VERTB, dev);
 }
 
-static int __init amimouse_init(void)
+static int __init amimouse_probe(struct platform_device *pdev)
 {
        int err;
+       struct input_dev *dev;
 
-       if (!MACH_IS_AMIGA || !AMIGAHW_PRESENT(AMI_MOUSE))
-               return -ENODEV;
-
-       amimouse_dev = input_allocate_device();
-       if (!amimouse_dev)
+       dev = input_allocate_device();
+       if (!dev)
                return -ENOMEM;
 
-       amimouse_dev->name = "Amiga mouse";
-       amimouse_dev->phys = "amimouse/input0";
-       amimouse_dev->id.bustype = BUS_AMIGA;
-       amimouse_dev->id.vendor = 0x0001;
-       amimouse_dev->id.product = 0x0002;
-       amimouse_dev->id.version = 0x0100;
+       dev->name = pdev->name;
+       dev->phys = "amimouse/input0";
+       dev->id.bustype = BUS_AMIGA;
+       dev->id.vendor = 0x0001;
+       dev->id.product = 0x0002;
+       dev->id.version = 0x0100;
 
-       amimouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
-       amimouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
-       amimouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+       dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+       dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
                BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
-       amimouse_dev->open = amimouse_open;
-       amimouse_dev->close = amimouse_close;
+       dev->open = amimouse_open;
+       dev->close = amimouse_close;
+       dev->dev.parent = &pdev->dev;
 
-       err = input_register_device(amimouse_dev);
+       err = input_register_device(dev);
        if (err) {
-               input_free_device(amimouse_dev);
+               input_free_device(dev);
                return err;
        }
 
+       platform_set_drvdata(pdev, dev);
+
        return 0;
 }
 
-static void __exit amimouse_exit(void)
+static int __exit amimouse_remove(struct platform_device *pdev)
 {
-        input_unregister_device(amimouse_dev);
+       struct input_dev *dev = platform_get_drvdata(pdev);
+
+       platform_set_drvdata(pdev, NULL);
+       input_unregister_device(dev);
+       return 0;
+}
+
+static struct platform_driver amimouse_driver = {
+       .remove = __exit_p(amimouse_remove),
+       .driver   = {
+               .name   = "amiga-mouse",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init amimouse_init(void)
+{
+       return platform_driver_probe(&amimouse_driver, amimouse_probe);
 }
 
 module_init(amimouse_init);
+
+static void __exit amimouse_exit(void)
+{
+       platform_driver_unregister(&amimouse_driver);
+}
+
 module_exit(amimouse_exit);
+
+MODULE_ALIAS("platform:amiga-mouse");
index 532279c..634f6f6 100644 (file)
@@ -1163,8 +1163,8 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 
        ts->reg = regulator_get(&spi->dev, "vcc");
        if (IS_ERR(ts->reg)) {
-               dev_err(&spi->dev, "unable to get regulator: %ld\n",
-                       PTR_ERR(ts->reg));
+               err = PTR_ERR(ts->reg);
+               dev_err(&spi->dev, "unable to get regulator: %ld\n", err);
                goto err_free_gpio;
        }
 
index e0b7c83..ac5d0f9 100644 (file)
@@ -413,6 +413,8 @@ static struct dev_pm_ops s3c_ts_pmops = {
 #endif
 
 static struct platform_device_id s3cts_driver_ids[] = {
+       { "s3c2410-ts", 0 },
+       { "s3c2440-ts", 0 },
        { "s3c64xx-ts", FEAT_PEN_IRQ },
        { }
 };
index 29a8bbf..567d572 100644 (file)
@@ -857,6 +857,11 @@ static int nexio_read_data(struct usbtouch_usb *usbtouch, unsigned char *pkt)
        if ((pkt[0] & 0xe0) != 0xe0)
                return 0;
 
+       if (be16_to_cpu(packet->data_len) > 0xff)
+               packet->data_len = cpu_to_be16(be16_to_cpu(packet->data_len) - 0x100);
+       if (be16_to_cpu(packet->x_len) > 0xff)
+               packet->x_len = cpu_to_be16(be16_to_cpu(packet->x_len) - 0x80);
+
        /* send ACK */
        ret = usb_submit_urb(priv->ack, GFP_ATOMIC);
 
@@ -1112,7 +1117,7 @@ static struct usbtouch_device_info usbtouch_dev_info[] = {
 
 #ifdef CONFIG_TOUCHSCREEN_USB_NEXIO
        [DEVTYPE_NEXIO] = {
-               .rept_size      = 128,
+               .rept_size      = 1024,
                .irq_always     = true,
                .read_data      = nexio_read_data,
                .init           = nexio_init,
index c3243c9..81048b8 100644 (file)
@@ -98,8 +98,6 @@ mISDN_read(struct file *filep, char __user *buf, size_t count, loff_t *off)
        if (*debug & DEBUG_TIMER)
                printk(KERN_DEBUG "%s(%p, %p, %d, %p)\n", __func__,
                        filep, buf, (int)count, off);
-       if (*off != filep->f_pos)
-               return -ESPIPE;
 
        if (list_empty(&dev->expired) && (dev->work == 0)) {
                if (filep->f_flags & O_NONBLOCK)
index 9ea17d6..d2c0f94 100644 (file)
@@ -4645,7 +4645,7 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
                        kfree(percpu->scribble);
                        pr_err("%s: failed memory allocation for cpu%ld\n",
                               __func__, cpu);
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(-ENOMEM);
                }
                break;
        case CPU_DEAD:
index d33693c..c4b117f 100644 (file)
@@ -186,14 +186,9 @@ static int i2o_cfg_parms(unsigned long arg, unsigned int type)
        if (!dev)
                return -ENXIO;
 
-       ops = kmalloc(kcmd.oplen, GFP_KERNEL);
-       if (!ops)
-               return -ENOMEM;
-
-       if (copy_from_user(ops, kcmd.opbuf, kcmd.oplen)) {
-               kfree(ops);
-               return -EFAULT;
-       }
+       ops = memdup_user(kcmd.opbuf, kcmd.oplen);
+       if (IS_ERR(ops))
+               return PTR_ERR(ops);
 
        /*
         * It's possible to have a _very_ large table
index 31a9911..5bfb2a2 100644 (file)
@@ -75,6 +75,9 @@ enum ctype {
        UNALIGNED_LOAD_STORE_WRITE,
        OVERWRITE_ALLOCATION,
        WRITE_AFTER_FREE,
+       SOFTLOCKUP,
+       HARDLOCKUP,
+       HUNG_TASK,
 };
 
 static char* cp_name[] = {
@@ -99,6 +102,9 @@ static char* cp_type[] = {
        "UNALIGNED_LOAD_STORE_WRITE",
        "OVERWRITE_ALLOCATION",
        "WRITE_AFTER_FREE",
+       "SOFTLOCKUP",
+       "HARDLOCKUP",
+       "HUNG_TASK",
 };
 
 static struct jprobe lkdtm;
@@ -320,6 +326,20 @@ static void lkdtm_do_action(enum ctype which)
                memset(data, 0x78, len);
                break;
        }
+       case SOFTLOCKUP:
+               preempt_disable();
+               for (;;)
+                       cpu_relax();
+               break;
+       case HARDLOCKUP:
+               local_irq_disable();
+               for (;;)
+                       cpu_relax();
+               break;
+       case HUNG_TASK:
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               schedule();
+               break;
        case NONE:
        default:
                break;
index 3168ebd..569e94d 100644 (file)
@@ -1252,9 +1252,8 @@ EXPORT_SYMBOL(mmc_card_can_sleep);
 /**
  *     mmc_suspend_host - suspend a host
  *     @host: mmc host
- *     @state: suspend mode (PM_SUSPEND_xxx)
  */
-int mmc_suspend_host(struct mmc_host *host, pm_message_t state)
+int mmc_suspend_host(struct mmc_host *host)
 {
        int err = 0;
 
index 0d96080..63772e7 100644 (file)
@@ -79,8 +79,6 @@ int mmc_wait_for_app_cmd(struct mmc_host *host, struct mmc_card *card,
         * we cannot use the retries field in mmc_command.
         */
        for (i = 0;i <= retries;i++) {
-               memset(&mrq, 0, sizeof(struct mmc_request));
-
                err = mmc_app_cmd(host, card);
                if (err) {
                        /* no point in retrying; no APP commands allowed */
index ff27c8c..0f687cd 100644 (file)
@@ -406,6 +406,36 @@ void sdio_writeb(struct sdio_func *func, u8 b, unsigned int addr, int *err_ret)
 EXPORT_SYMBOL_GPL(sdio_writeb);
 
 /**
+ *     sdio_writeb_readb - write and read a byte from SDIO function
+ *     @func: SDIO function to access
+ *     @write_byte: byte to write
+ *     @addr: address to write to
+ *     @err_ret: optional status value from transfer
+ *
+ *     Performs a RAW (Read after Write) operation as defined by SDIO spec -
+ *     single byte is written to address space of a given SDIO function and
+ *     response is read back from the same address, both using single request.
+ *     If there is a problem with the operation, 0xff is returned and
+ *     @err_ret will contain the error code.
+ */
+u8 sdio_writeb_readb(struct sdio_func *func, u8 write_byte,
+       unsigned int addr, int *err_ret)
+{
+       int ret;
+       u8 val;
+
+       ret = mmc_io_rw_direct(func->card, 1, func->num, addr,
+                       write_byte, &val);
+       if (err_ret)
+               *err_ret = ret;
+       if (ret)
+               val = 0xff;
+
+       return val;
+}
+EXPORT_SYMBOL_GPL(sdio_writeb_readb);
+
+/**
  *     sdio_memcpy_fromio - read a chunk of memory from a SDIO function
  *     @func: SDIO function to access
  *     @dst: buffer to store the data
index 2e13b94..e171e77 100644 (file)
@@ -136,6 +136,18 @@ config MMC_SDHCI_S3C
 
          If unsure, say N.
 
+config MMC_SDHCI_SPEAR
+       tristate "SDHCI support on ST SPEAr platform"
+       depends on MMC_SDHCI && PLAT_SPEAR
+       help
+         This selects the Secure Digital Host Controller Interface (SDHCI)
+         often referrered to as the HSMMC block in some of the ST SPEAR range
+         of SoC
+
+         If you have a controller with this interface, say Y or M here.
+
+         If unsure, say N.
+
 config MMC_SDHCI_S3C_DMA
        bool "DMA support on S3C SDHCI"
        depends on MMC_SDHCI_S3C && EXPERIMENTAL
@@ -412,3 +424,11 @@ config SDH_BFIN_MISSING_CMD_PULLUP_WORKAROUND
        depends on SDH_BFIN
        help
          If you say yes here SD-Cards may work on the EZkit.
+
+config MMC_SH_MMCIF
+       tristate "SuperH Internal MMCIF support"
+       depends on MMC_BLOCK && (SUPERH || ARCH_SHMOBILE)
+       help
+         This selects the MMC Host Interface controler (MMCIF).
+
+         This driver supports MMCIF in sh7724/sh7757/sh7372.
index f480397..e30c2ee 100644 (file)
@@ -14,6 +14,7 @@ obj-$(CONFIG_MMC_SDHCI)               += sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)    += sdhci-pci.o
 obj-$(CONFIG_MMC_SDHCI_PLTFM)  += sdhci-pltfm.o
 obj-$(CONFIG_MMC_SDHCI_S3C)    += sdhci-s3c.o
+obj-$(CONFIG_MMC_SDHCI_SPEAR)  += sdhci-spear.o
 obj-$(CONFIG_MMC_WBSD)         += wbsd.o
 obj-$(CONFIG_MMC_AU1X)         += au1xmmc.o
 obj-$(CONFIG_MMC_OMAP)         += omap.o
@@ -34,6 +35,7 @@ obj-$(CONFIG_MMC_TMIO)                += tmio_mmc.o
 obj-$(CONFIG_MMC_CB710)        += cb710-mmc.o
 obj-$(CONFIG_MMC_VIA_SDMMC)    += via-sdmmc.o
 obj-$(CONFIG_SDH_BFIN)         += bfin_sdh.o
+obj-$(CONFIG_MMC_SH_MMCIF)     += sh_mmcif.o
 
 obj-$(CONFIG_MMC_SDHCI_OF)     += sdhci-of.o
 sdhci-of-y                             := sdhci-of-core.o
index 336d9f5..5f3a599 100644 (file)
@@ -1157,7 +1157,7 @@ static int at91_mci_suspend(struct platform_device *pdev, pm_message_t state)
                enable_irq_wake(host->board->det_pin);
 
        if (mmc)
-               ret = mmc_suspend_host(mmc, state);
+               ret = mmc_suspend_host(mmc);
 
        return ret;
 }
index df0e8a8..95ef864 100644 (file)
@@ -173,6 +173,7 @@ struct atmel_mci {
  * @mmc: The mmc_host representing this slot.
  * @host: The MMC controller this slot is using.
  * @sdc_reg: Value of SDCR to be written before using this slot.
+ * @sdio_irq: SDIO irq mask for this slot.
  * @mrq: mmc_request currently being processed or waiting to be
  *     processed, or NULL when the slot is idle.
  * @queue_node: List node for placing this node in the @queue list of
@@ -191,6 +192,7 @@ struct atmel_mci_slot {
        struct atmel_mci        *host;
 
        u32                     sdc_reg;
+       u32                     sdio_irq;
 
        struct mmc_request      *mrq;
        struct list_head        queue_node;
@@ -792,7 +794,7 @@ static void atmci_start_request(struct atmel_mci *host,
        mci_writel(host, SDCR, slot->sdc_reg);
 
        iflags = mci_readl(host, IMR);
-       if (iflags)
+       if (iflags & ~(MCI_SDIOIRQA | MCI_SDIOIRQB))
                dev_warn(&slot->mmc->class_dev, "WARNING: IMR=0x%08x\n",
                                iflags);
 
@@ -952,10 +954,21 @@ static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
                if (mci_has_rwproof())
                        host->mode_reg |= (MCI_MR_WRPROOF | MCI_MR_RDPROOF);
 
-               if (list_empty(&host->queue))
+               if (atmci_is_mci2()) {
+                       /* setup High Speed mode in relation with card capacity */
+                       if (ios->timing == MMC_TIMING_SD_HS)
+                               host->cfg_reg |= MCI_CFG_HSMODE;
+                       else
+                               host->cfg_reg &= ~MCI_CFG_HSMODE;
+               }
+
+               if (list_empty(&host->queue)) {
                        mci_writel(host, MR, host->mode_reg);
-               else
+                       if (atmci_is_mci2())
+                               mci_writel(host, CFG, host->cfg_reg);
+               } else {
                        host->need_clock_update = true;
+               }
 
                spin_unlock_bh(&host->lock);
        } else {
@@ -1030,11 +1043,23 @@ static int atmci_get_cd(struct mmc_host *mmc)
        return present;
 }
 
+static void atmci_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+       struct atmel_mci_slot   *slot = mmc_priv(mmc);
+       struct atmel_mci        *host = slot->host;
+
+       if (enable)
+               mci_writel(host, IER, slot->sdio_irq);
+       else
+               mci_writel(host, IDR, slot->sdio_irq);
+}
+
 static const struct mmc_host_ops atmci_ops = {
        .request        = atmci_request,
        .set_ios        = atmci_set_ios,
        .get_ro         = atmci_get_ro,
        .get_cd         = atmci_get_cd,
+       .enable_sdio_irq = atmci_enable_sdio_irq,
 };
 
 /* Called with host->lock held */
@@ -1052,8 +1077,11 @@ static void atmci_request_end(struct atmel_mci *host, struct mmc_request *mrq)
         * necessary if set_ios() is called when a different slot is
         * busy transfering data.
         */
-       if (host->need_clock_update)
+       if (host->need_clock_update) {
                mci_writel(host, MR, host->mode_reg);
+               if (atmci_is_mci2())
+                       mci_writel(host, CFG, host->cfg_reg);
+       }
 
        host->cur_slot->mrq = NULL;
        host->mrq = NULL;
@@ -1483,6 +1511,19 @@ static void atmci_cmd_interrupt(struct atmel_mci *host, u32 status)
        tasklet_schedule(&host->tasklet);
 }
 
+static void atmci_sdio_interrupt(struct atmel_mci *host, u32 status)
+{
+       int     i;
+
+       for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+               struct atmel_mci_slot *slot = host->slot[i];
+               if (slot && (status & slot->sdio_irq)) {
+                       mmc_signal_sdio_irq(slot->mmc);
+               }
+       }
+}
+
+
 static irqreturn_t atmci_interrupt(int irq, void *dev_id)
 {
        struct atmel_mci        *host = dev_id;
@@ -1522,6 +1563,10 @@ static irqreturn_t atmci_interrupt(int irq, void *dev_id)
 
                if (pending & MCI_CMDRDY)
                        atmci_cmd_interrupt(host, status);
+
+               if (pending & (MCI_SDIOIRQA | MCI_SDIOIRQB))
+                       atmci_sdio_interrupt(host, status);
+
        } while (pass_count++ < 5);
 
        return pass_count ? IRQ_HANDLED : IRQ_NONE;
@@ -1544,7 +1589,7 @@ static irqreturn_t atmci_detect_interrupt(int irq, void *dev_id)
 
 static int __init atmci_init_slot(struct atmel_mci *host,
                struct mci_slot_pdata *slot_data, unsigned int id,
-               u32 sdc_reg)
+               u32 sdc_reg, u32 sdio_irq)
 {
        struct mmc_host                 *mmc;
        struct atmel_mci_slot           *slot;
@@ -1560,11 +1605,16 @@ static int __init atmci_init_slot(struct atmel_mci *host,
        slot->wp_pin = slot_data->wp_pin;
        slot->detect_is_active_high = slot_data->detect_is_active_high;
        slot->sdc_reg = sdc_reg;
+       slot->sdio_irq = sdio_irq;
 
        mmc->ops = &atmci_ops;
        mmc->f_min = DIV_ROUND_UP(host->bus_hz, 512);
        mmc->f_max = host->bus_hz / 2;
        mmc->ocr_avail  = MMC_VDD_32_33 | MMC_VDD_33_34;
+       if (sdio_irq)
+               mmc->caps |= MMC_CAP_SDIO_IRQ;
+       if (atmci_is_mci2())
+               mmc->caps |= MMC_CAP_SD_HIGHSPEED;
        if (slot_data->bus_width >= 4)
                mmc->caps |= MMC_CAP_4_BIT_DATA;
 
@@ -1753,13 +1803,13 @@ static int __init atmci_probe(struct platform_device *pdev)
        ret = -ENODEV;
        if (pdata->slot[0].bus_width) {
                ret = atmci_init_slot(host, &pdata->slot[0],
-                               0, MCI_SDCSEL_SLOT_A);
+                               0, MCI_SDCSEL_SLOT_A, MCI_SDIOIRQA);
                if (!ret)
                        nr_slots++;
        }
        if (pdata->slot[1].bus_width) {
                ret = atmci_init_slot(host, &pdata->slot[1],
-                               1, MCI_SDCSEL_SLOT_B);
+                               1, MCI_SDCSEL_SLOT_B, MCI_SDIOIRQB);
                if (!ret)
                        nr_slots++;
        }
index f583444..c8da5d3 100644 (file)
@@ -1142,7 +1142,7 @@ static int au1xmmc_suspend(struct platform_device *pdev, pm_message_t state)
        struct au1xmmc_host *host = platform_get_drvdata(pdev);
        int ret;
 
-       ret = mmc_suspend_host(host->mmc, state);
+       ret = mmc_suspend_host(host->mmc);
        if (ret)
                return ret;
 
index 6919e84..4b0e677 100644 (file)
@@ -576,7 +576,7 @@ static int sdh_suspend(struct platform_device *dev, pm_message_t state)
        int ret = 0;
 
        if (mmc)
-               ret = mmc_suspend_host(mmc, state);
+               ret = mmc_suspend_host(mmc);
 
        bfin_write_SDH_PWR_CTL(bfin_read_SDH_PWR_CTL() & ~PWR_ON);
        peripheral_free_list(drv_data->pin_req);
index 92a324f..ca3bdc8 100644 (file)
@@ -675,7 +675,7 @@ static int cb710_mmc_suspend(struct platform_device *pdev, pm_message_t state)
        struct mmc_host *mmc = cb710_slot_to_mmc(slot);
        int err;
 
-       err = mmc_suspend_host(mmc, state);
+       err = mmc_suspend_host(mmc);
        if (err)
                return err;
 
index 3bd0ba2..33d9f1b 100644 (file)
 
 /*
  * One scatterlist dma "segment" is at most MAX_CCNT rw_threshold units,
- * and we handle up to NR_SG segments.  MMC_BLOCK_BOUNCE kicks in only
+ * and we handle up to MAX_NR_SG segments.  MMC_BLOCK_BOUNCE kicks in only
  * for drivers with max_hw_segs == 1, making the segments bigger (64KB)
- * than the page or two that's otherwise typical.  NR_SG == 16 gives at
- * least the same throughput boost, using EDMA transfer linkage instead
- * of spending CPU time copying pages.
+ * than the page or two that's otherwise typical. nr_sg (passed from
+ * platform data) == 16 gives at least the same throughput boost, using
+ * EDMA transfer linkage instead of spending CPU time copying pages.
  */
 #define MAX_CCNT       ((1 << 16) - 1)
 
-#define NR_SG          16
+#define MAX_NR_SG      16
 
 static unsigned rw_threshold = 32;
 module_param(rw_threshold, uint, S_IRUGO);
@@ -171,6 +171,7 @@ struct mmc_davinci_host {
 #define DAVINCI_MMC_DATADIR_READ       1
 #define DAVINCI_MMC_DATADIR_WRITE      2
        unsigned char data_dir;
+       unsigned char suspended;
 
        /* buffer is used during PIO of one scatterlist segment, and
         * is updated along with buffer_bytes_left.  bytes_left applies
@@ -192,7 +193,7 @@ struct mmc_davinci_host {
        struct edmacc_param     tx_template;
        struct edmacc_param     rx_template;
        unsigned                n_link;
-       u32                     links[NR_SG - 1];
+       u32                     links[MAX_NR_SG - 1];
 
        /* For PIO we walk scatterlists one segment at a time. */
        unsigned int            sg_len;
@@ -202,6 +203,8 @@ struct mmc_davinci_host {
        u8 version;
        /* for ns in one cycle calculation */
        unsigned ns_in_one_cycle;
+       /* Number of sg segments */
+       u8 nr_sg;
 #ifdef CONFIG_CPU_FREQ
        struct notifier_block   freq_transition;
 #endif
@@ -568,6 +571,7 @@ davinci_release_dma_channels(struct mmc_davinci_host *host)
 
 static int __init davinci_acquire_dma_channels(struct mmc_davinci_host *host)
 {
+       u32 link_size;
        int r, i;
 
        /* Acquire master DMA write channel */
@@ -593,7 +597,8 @@ static int __init davinci_acquire_dma_channels(struct mmc_davinci_host *host)
        /* Allocate parameter RAM slots, which will later be bound to a
         * channel as needed to handle a scatterlist.
         */
-       for (i = 0; i < ARRAY_SIZE(host->links); i++) {
+       link_size = min_t(unsigned, host->nr_sg, ARRAY_SIZE(host->links));
+       for (i = 0; i < link_size; i++) {
                r = edma_alloc_slot(EDMA_CTLR(host->txdma), EDMA_SLOT_ANY);
                if (r < 0) {
                        dev_dbg(mmc_dev(host->mmc), "dma PaRAM alloc --> %d\n",
@@ -905,19 +910,26 @@ static void mmc_davinci_cmd_done(struct mmc_davinci_host *host,
        }
 }
 
-static void
-davinci_abort_data(struct mmc_davinci_host *host, struct mmc_data *data)
+static inline void mmc_davinci_reset_ctrl(struct mmc_davinci_host *host,
+                                                               int val)
 {
        u32 temp;
 
-       /* reset command and data state machines */
        temp = readl(host->base + DAVINCI_MMCCTL);
-       writel(temp | MMCCTL_CMDRST | MMCCTL_DATRST,
-               host->base + DAVINCI_MMCCTL);
+       if (val)        /* reset */
+               temp |= MMCCTL_CMDRST | MMCCTL_DATRST;
+       else            /* enable */
+               temp &= ~(MMCCTL_CMDRST | MMCCTL_DATRST);
 
-       temp &= ~(MMCCTL_CMDRST | MMCCTL_DATRST);
-       udelay(10);
        writel(temp, host->base + DAVINCI_MMCCTL);
+       udelay(10);
+}
+
+static void
+davinci_abort_data(struct mmc_davinci_host *host, struct mmc_data *data)
+{
+       mmc_davinci_reset_ctrl(host, 1);
+       mmc_davinci_reset_ctrl(host, 0);
 }
 
 static irqreturn_t mmc_davinci_irq(int irq, void *dev_id)
@@ -1121,15 +1133,8 @@ static inline void mmc_davinci_cpufreq_deregister(struct mmc_davinci_host *host)
 #endif
 static void __init init_mmcsd_host(struct mmc_davinci_host *host)
 {
-       /* DAT line portion is diabled and in reset state */
-       writel(readl(host->base + DAVINCI_MMCCTL) | MMCCTL_DATRST,
-               host->base + DAVINCI_MMCCTL);
-
-       /* CMD line portion is diabled and in reset state */
-       writel(readl(host->base + DAVINCI_MMCCTL) | MMCCTL_CMDRST,
-               host->base + DAVINCI_MMCCTL);
 
-       udelay(10);
+       mmc_davinci_reset_ctrl(host, 1);
 
        writel(0, host->base + DAVINCI_MMCCLK);
        writel(MMCCLK_CLKEN, host->base + DAVINCI_MMCCLK);
@@ -1137,12 +1142,7 @@ static void __init init_mmcsd_host(struct mmc_davinci_host *host)
        writel(0x1FFF, host->base + DAVINCI_MMCTOR);
        writel(0xFFFF, host->base + DAVINCI_MMCTOD);
 
-       writel(readl(host->base + DAVINCI_MMCCTL) & ~MMCCTL_DATRST,
-               host->base + DAVINCI_MMCCTL);
-       writel(readl(host->base + DAVINCI_MMCCTL) & ~MMCCTL_CMDRST,
-               host->base + DAVINCI_MMCCTL);
-
-       udelay(10);
+       mmc_davinci_reset_ctrl(host, 0);
 }
 
 static int __init davinci_mmcsd_probe(struct platform_device *pdev)
@@ -1202,6 +1202,12 @@ static int __init davinci_mmcsd_probe(struct platform_device *pdev)
 
        init_mmcsd_host(host);
 
+       if (pdata->nr_sg)
+               host->nr_sg = pdata->nr_sg - 1;
+
+       if (host->nr_sg > MAX_NR_SG || !host->nr_sg)
+               host->nr_sg = MAX_NR_SG;
+
        host->use_dma = use_dma;
        host->irq = irq;
 
@@ -1327,32 +1333,65 @@ static int __exit davinci_mmcsd_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int davinci_mmcsd_suspend(struct platform_device *pdev, pm_message_t msg)
+static int davinci_mmcsd_suspend(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct mmc_davinci_host *host = platform_get_drvdata(pdev);
+       int ret;
 
-       return mmc_suspend_host(host->mmc, msg);
+       mmc_host_enable(host->mmc);
+       ret = mmc_suspend_host(host->mmc);
+       if (!ret) {
+               writel(0, host->base + DAVINCI_MMCIM);
+               mmc_davinci_reset_ctrl(host, 1);
+               mmc_host_disable(host->mmc);
+               clk_disable(host->clk);
+               host->suspended = 1;
+       } else {
+               host->suspended = 0;
+               mmc_host_disable(host->mmc);
+       }
+
+       return ret;
 }
 
-static int davinci_mmcsd_resume(struct platform_device *pdev)
+static int davinci_mmcsd_resume(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct mmc_davinci_host *host = platform_get_drvdata(pdev);
+       int ret;
+
+       if (!host->suspended)
+               return 0;
 
-       return mmc_resume_host(host->mmc);
+       clk_enable(host->clk);
+       mmc_host_enable(host->mmc);
+
+       mmc_davinci_reset_ctrl(host, 0);
+       ret = mmc_resume_host(host->mmc);
+       if (!ret)
+               host->suspended = 0;
+
+       return ret;
 }
+
+static const struct dev_pm_ops davinci_mmcsd_pm = {
+       .suspend        = davinci_mmcsd_suspend,
+       .resume         = davinci_mmcsd_resume,
+};
+
+#define davinci_mmcsd_pm_ops (&davinci_mmcsd_pm)
 #else
-#define davinci_mmcsd_suspend  NULL
-#define davinci_mmcsd_resume   NULL
+#define davinci_mmcsd_pm_ops NULL
 #endif
 
 static struct platform_driver davinci_mmcsd_driver = {
        .driver         = {
                .name   = "davinci_mmc",
                .owner  = THIS_MODULE,
+               .pm     = davinci_mmcsd_pm_ops,
        },
        .remove         = __exit_p(davinci_mmcsd_remove),
-       .suspend        = davinci_mmcsd_suspend,
-       .resume         = davinci_mmcsd_resume,
 };
 
 static int __init davinci_mmcsd_init(void)
index bf98d7c..9a68ff4 100644 (file)
@@ -1115,7 +1115,7 @@ static int imxmci_suspend(struct platform_device *dev, pm_message_t state)
        int ret = 0;
 
        if (mmc)
-               ret = mmc_suspend_host(mmc, state);
+               ret = mmc_suspend_host(mmc);
 
        return ret;
 }
index ff115d9..4917af9 100644 (file)
@@ -824,7 +824,7 @@ static int mmci_suspend(struct amba_device *dev, pm_message_t state)
        if (mmc) {
                struct mmci_host *host = mmc_priv(mmc);
 
-               ret = mmc_suspend_host(mmc, state);
+               ret = mmc_suspend_host(mmc);
                if (ret == 0)
                        writel(0, host->base + MMCIMASK0);
        }
index 61f1d27..24e0945 100644 (file)
@@ -1327,7 +1327,7 @@ msmsdcc_suspend(struct platform_device *dev, pm_message_t state)
                        disable_irq(host->stat_irq);
 
                if (mmc->card && mmc->card->type != MMC_TYPE_SDIO)
-                       rc = mmc_suspend_host(mmc, state);
+                       rc = mmc_suspend_host(mmc);
                if (!rc)
                        msmsdcc_writel(host, 0, MMCIMASK0);
                if (host->clks_on)
index 34e2348..366eefa 100644 (file)
@@ -865,7 +865,7 @@ static int mvsd_suspend(struct platform_device *dev, pm_message_t state)
        int ret = 0;
 
        if (mmc)
-               ret = mmc_suspend_host(mmc, state);
+               ret = mmc_suspend_host(mmc);
 
        return ret;
 }
index ec18e3b..d9d4a72 100644 (file)
@@ -932,7 +932,7 @@ static int mxcmci_suspend(struct platform_device *dev, pm_message_t state)
        int ret = 0;
 
        if (mmc)
-               ret = mmc_suspend_host(mmc, state);
+               ret = mmc_suspend_host(mmc);
 
        return ret;
 }
index 84d2804..2b28168 100644 (file)
 #include <plat/fpga.h>
 
 #define        OMAP_MMC_REG_CMD        0x00
-#define        OMAP_MMC_REG_ARGL       0x04
-#define        OMAP_MMC_REG_ARGH       0x08
-#define        OMAP_MMC_REG_CON        0x0c
-#define        OMAP_MMC_REG_STAT       0x10
-#define        OMAP_MMC_REG_IE         0x14
-#define        OMAP_MMC_REG_CTO        0x18
-#define        OMAP_MMC_REG_DTO        0x1c
-#define        OMAP_MMC_REG_DATA       0x20
-#define        OMAP_MMC_REG_BLEN       0x24
-#define        OMAP_MMC_REG_NBLK       0x28
-#define        OMAP_MMC_REG_BUF        0x2c
-#define OMAP_MMC_REG_SDIO      0x34
-#define        OMAP_MMC_REG_REV        0x3c
-#define        OMAP_MMC_REG_RSP0       0x40
-#define        OMAP_MMC_REG_RSP1       0x44
-#define        OMAP_MMC_REG_RSP2       0x48
-#define        OMAP_MMC_REG_RSP3       0x4c
-#define        OMAP_MMC_REG_RSP4       0x50
-#define        OMAP_MMC_REG_RSP5       0x54
-#define        OMAP_MMC_REG_RSP6       0x58
-#define        OMAP_MMC_REG_RSP7       0x5c
-#define        OMAP_MMC_REG_IOSR       0x60
-#define        OMAP_MMC_REG_SYSC       0x64
-#define        OMAP_MMC_REG_SYSS       0x68
+#define        OMAP_MMC_REG_ARGL       0x01
+#define        OMAP_MMC_REG_ARGH       0x02
+#define        OMAP_MMC_REG_CON        0x03
+#define        OMAP_MMC_REG_STAT       0x04
+#define        OMAP_MMC_REG_IE         0x05
+#define        OMAP_MMC_REG_CTO        0x06
+#define        OMAP_MMC_REG_DTO        0x07
+#define        OMAP_MMC_REG_DATA       0x08
+#define        OMAP_MMC_REG_BLEN       0x09
+#define        OMAP_MMC_REG_NBLK       0x0a
+#define        OMAP_MMC_REG_BUF        0x0b
+#define        OMAP_MMC_REG_SDIO       0x0d
+#define        OMAP_MMC_REG_REV        0x0f
+#define        OMAP_MMC_REG_RSP0       0x10
+#define        OMAP_MMC_REG_RSP1       0x11
+#define        OMAP_MMC_REG_RSP2       0x12
+#define        OMAP_MMC_REG_RSP3       0x13
+#define        OMAP_MMC_REG_RSP4       0x14
+#define        OMAP_MMC_REG_RSP5       0x15
+#define        OMAP_MMC_REG_RSP6       0x16
+#define        OMAP_MMC_REG_RSP7       0x17
+#define        OMAP_MMC_REG_IOSR       0x18
+#define        OMAP_MMC_REG_SYSC       0x19
+#define        OMAP_MMC_REG_SYSS       0x1a
 
 #define        OMAP_MMC_STAT_CARD_ERR          (1 << 14)
 #define        OMAP_MMC_STAT_CARD_IRQ          (1 << 13)
@@ -78,8 +78,9 @@
 #define        OMAP_MMC_STAT_CARD_BUSY         (1 <<  2)
 #define        OMAP_MMC_STAT_END_OF_CMD        (1 <<  0)
 
-#define OMAP_MMC_READ(host, reg)       __raw_readw((host)->virt_base + OMAP_MMC_REG_##reg)
-#define OMAP_MMC_WRITE(host, reg, val) __raw_writew((val), (host)->virt_base + OMAP_MMC_REG_##reg)
+#define OMAP_MMC_REG(host, reg)                (OMAP_MMC_REG_##reg << (host)->reg_shift)
+#define OMAP_MMC_READ(host, reg)       __raw_readw((host)->virt_base + OMAP_MMC_REG(host, reg))
+#define OMAP_MMC_WRITE(host, reg, val) __raw_writew((val), (host)->virt_base + OMAP_MMC_REG(host, reg))
 
 /*
  * Command types
@@ -133,6 +134,7 @@ struct mmc_omap_host {
        int                     irq;
        unsigned char           bus_mode;
        unsigned char           hw_bus_mode;
+       unsigned int            reg_shift;
 
        struct work_struct      cmd_abort_work;
        unsigned                abort:1;
@@ -680,9 +682,9 @@ mmc_omap_xfer_data(struct mmc_omap_host *host, int write)
        host->data->bytes_xfered += n;
 
        if (write) {
-               __raw_writesw(host->virt_base + OMAP_MMC_REG_DATA, host->buffer, n);
+               __raw_writesw(host->virt_base + OMAP_MMC_REG(host, DATA), host->buffer, n);
        } else {
-               __raw_readsw(host->virt_base + OMAP_MMC_REG_DATA, host->buffer, n);
+               __raw_readsw(host->virt_base + OMAP_MMC_REG(host, DATA), host->buffer, n);
        }
 }
 
@@ -900,7 +902,7 @@ mmc_omap_prepare_dma(struct mmc_omap_host *host, struct mmc_data *data)
        int dst_port = 0;
        int sync_dev = 0;
 
-       data_addr = host->phys_base + OMAP_MMC_REG_DATA;
+       data_addr = host->phys_base + OMAP_MMC_REG(host, DATA);
        frame = data->blksz;
        count = sg_dma_len(sg);
 
@@ -1493,6 +1495,8 @@ static int __init mmc_omap_probe(struct platform_device *pdev)
                }
        }
 
+       host->reg_shift = (cpu_is_omap7xx() ? 1 : 2);
+
        return 0;
 
 err_plat_cleanup:
@@ -1557,7 +1561,7 @@ static int mmc_omap_suspend(struct platform_device *pdev, pm_message_t mesg)
                struct mmc_omap_slot *slot;
 
                slot = host->slots[i];
-               ret = mmc_suspend_host(slot->mmc, mesg);
+               ret = mmc_suspend_host(slot->mmc);
                if (ret < 0) {
                        while (--i >= 0) {
                                slot = host->slots[i];
index e9caf69..b032828 100644 (file)
@@ -157,12 +157,10 @@ struct omap_hsmmc_host {
         */
        struct  regulator       *vcc;
        struct  regulator       *vcc_aux;
-       struct  semaphore       sem;
        struct  work_struct     mmc_carddetect_work;
        void    __iomem         *base;
        resource_size_t         mapbase;
        spinlock_t              irq_lock; /* Prevent races with irq handler */
-       unsigned long           flags;
        unsigned int            id;
        unsigned int            dma_len;
        unsigned int            dma_sg_idx;
@@ -183,6 +181,7 @@ struct omap_hsmmc_host {
        int                     protect_card;
        int                     reqs_blocked;
        int                     use_reg;
+       int                     req_in_progress;
 
        struct  omap_mmc_platform_data  *pdata;
 };
@@ -524,6 +523,27 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host)
                dev_dbg(mmc_dev(host->mmc), "MMC Clock is not stoped\n");
 }
 
+static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host)
+{
+       unsigned int irq_mask;
+
+       if (host->use_dma)
+               irq_mask = INT_EN_MASK & ~(BRR_ENABLE | BWR_ENABLE);
+       else
+               irq_mask = INT_EN_MASK;
+
+       OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+       OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+       OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+}
+
+static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host)
+{
+       OMAP_HSMMC_WRITE(host->base, ISE, 0);
+       OMAP_HSMMC_WRITE(host->base, IE, 0);
+       OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+}
+
 #ifdef CONFIG_PM
 
 /*
@@ -592,9 +612,7 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
                && time_before(jiffies, timeout))
                ;
 
-       OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
-       OMAP_HSMMC_WRITE(host->base, ISE, INT_EN_MASK);
-       OMAP_HSMMC_WRITE(host->base, IE, INT_EN_MASK);
+       omap_hsmmc_disable_irq(host);
 
        /* Do not initialize card-specific things if the power is off */
        if (host->power_mode == MMC_POWER_OFF)
@@ -697,6 +715,8 @@ static void send_init_stream(struct omap_hsmmc_host *host)
                return;
 
        disable_irq(host->irq);
+
+       OMAP_HSMMC_WRITE(host->base, IE, INT_EN_MASK);
        OMAP_HSMMC_WRITE(host->base, CON,
                OMAP_HSMMC_READ(host->base, CON) | INIT_STREAM);
        OMAP_HSMMC_WRITE(host->base, CMD, INIT_STREAM_CMD);
@@ -762,17 +782,7 @@ omap_hsmmc_start_command(struct omap_hsmmc_host *host, struct mmc_command *cmd,
                mmc_hostname(host->mmc), cmd->opcode, cmd->arg);
        host->cmd = cmd;
 
-       /*
-        * Clear status bits and enable interrupts
-        */
-       OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
-       OMAP_HSMMC_WRITE(host->base, ISE, INT_EN_MASK);
-
-       if (host->use_dma)
-               OMAP_HSMMC_WRITE(host->base, IE,
-                                INT_EN_MASK & ~(BRR_ENABLE | BWR_ENABLE));
-       else
-               OMAP_HSMMC_WRITE(host->base, IE, INT_EN_MASK);
+       omap_hsmmc_enable_irq(host);
 
        host->response_busy = 0;
        if (cmd->flags & MMC_RSP_PRESENT) {
@@ -806,13 +816,7 @@ omap_hsmmc_start_command(struct omap_hsmmc_host *host, struct mmc_command *cmd,
        if (host->use_dma)
                cmdreg |= DMA_EN;
 
-       /*
-        * In an interrupt context (i.e. STOP command), the spinlock is unlocked
-        * by the interrupt handler, otherwise (i.e. for a new request) it is
-        * unlocked here.
-        */
-       if (!in_interrupt())
-               spin_unlock_irqrestore(&host->irq_lock, host->flags);
+       host->req_in_progress = 1;
 
        OMAP_HSMMC_WRITE(host->base, ARG, cmd->arg);
        OMAP_HSMMC_WRITE(host->base, CMD, cmdreg);
@@ -827,6 +831,23 @@ omap_hsmmc_get_dma_dir(struct omap_hsmmc_host *host, struct mmc_data *data)
                return DMA_FROM_DEVICE;
 }
 
+static void omap_hsmmc_request_done(struct omap_hsmmc_host *host, struct mmc_request *mrq)
+{
+       int dma_ch;
+
+       spin_lock(&host->irq_lock);
+       host->req_in_progress = 0;
+       dma_ch = host->dma_ch;
+       spin_unlock(&host->irq_lock);
+
+       omap_hsmmc_disable_irq(host);
+       /* Do not complete the request if DMA is still in progress */
+       if (mrq->data && host->use_dma && dma_ch != -1)
+               return;
+       host->mrq = NULL;
+       mmc_request_done(host->mmc, mrq);
+}
+
 /*
  * Notify the transfer complete to MMC core
  */
@@ -843,25 +864,19 @@ omap_hsmmc_xfer_done(struct omap_hsmmc_host *host, struct mmc_data *data)
                        return;
                }
 
-               host->mrq = NULL;
-               mmc_request_done(host->mmc, mrq);
+               omap_hsmmc_request_done(host, mrq);
                return;
        }
 
        host->data = NULL;
 
-       if (host->use_dma && host->dma_ch != -1)
-               dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->dma_len,
-                       omap_hsmmc_get_dma_dir(host, data));
-
        if (!data->error)
                data->bytes_xfered += data->blocks * (data->blksz);
        else
                data->bytes_xfered = 0;
 
        if (!data->stop) {
-               host->mrq = NULL;
-               mmc_request_done(host->mmc, data->mrq);
+               omap_hsmmc_request_done(host, data->mrq);
                return;
        }
        omap_hsmmc_start_command(host, data->stop, NULL);
@@ -887,10 +902,8 @@ omap_hsmmc_cmd_done(struct omap_hsmmc_host *host, struct mmc_command *cmd)
                        cmd->resp[0] = OMAP_HSMMC_READ(host->base, RSP10);
                }
        }
-       if ((host->data == NULL && !host->response_busy) || cmd->error) {
-               host->mrq = NULL;
-               mmc_request_done(host->mmc, cmd->mrq);
-       }
+       if ((host->data == NULL && !host->response_busy) || cmd->error)
+               omap_hsmmc_request_done(host, cmd->mrq);
 }
 
 /*
@@ -898,14 +911,19 @@ omap_hsmmc_cmd_done(struct omap_hsmmc_host *host, struct mmc_command *cmd)
  */
 static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno)
 {
+       int dma_ch;
+
        host->data->error = errno;
 
-       if (host->use_dma && host->dma_ch != -1) {
+       spin_lock(&host->irq_lock);
+       dma_ch = host->dma_ch;
+       host->dma_ch = -1;
+       spin_unlock(&host->irq_lock);
+
+       if (host->use_dma && dma_ch != -1) {
                dma_unmap_sg(mmc_dev(host->mmc), host->data->sg, host->dma_len,
                        omap_hsmmc_get_dma_dir(host, host->data));
-               omap_free_dma(host->dma_ch);
-               host->dma_ch = -1;
-               up(&host->sem);
+               omap_free_dma(dma_ch);
        }
        host->data = NULL;
 }
@@ -967,28 +985,21 @@ static inline void omap_hsmmc_reset_controller_fsm(struct omap_hsmmc_host *host,
                        __func__);
 }
 
-/*
- * MMC controller IRQ handler
- */
-static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
+static void omap_hsmmc_do_irq(struct omap_hsmmc_host *host, int status)
 {
-       struct omap_hsmmc_host *host = dev_id;
        struct mmc_data *data;
-       int end_cmd = 0, end_trans = 0, status;
-
-       spin_lock(&host->irq_lock);
-
-       if (host->mrq == NULL) {
-               OMAP_HSMMC_WRITE(host->base, STAT,
-                       OMAP_HSMMC_READ(host->base, STAT));
-               /* Flush posted write */
-               OMAP_HSMMC_READ(host->base, STAT);
-               spin_unlock(&host->irq_lock);
-               return IRQ_HANDLED;
+       int end_cmd = 0, end_trans = 0;
+
+       if (!host->req_in_progress) {
+               do {
+                       OMAP_HSMMC_WRITE(host->base, STAT, status);
+                       /* Flush posted write */
+                       status = OMAP_HSMMC_READ(host->base, STAT);
+               } while (status & INT_EN_MASK);
+               return;
        }
 
        data = host->data;
-       status = OMAP_HSMMC_READ(host->base, STAT);
        dev_dbg(mmc_dev(host->mmc), "IRQ Status is %x\n", status);
 
        if (status & ERR) {
@@ -1041,15 +1052,27 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
        }
 
        OMAP_HSMMC_WRITE(host->base, STAT, status);
-       /* Flush posted write */
-       OMAP_HSMMC_READ(host->base, STAT);
 
        if (end_cmd || ((status & CC) && host->cmd))
                omap_hsmmc_cmd_done(host, host->cmd);
        if ((end_trans || (status & TC)) && host->mrq)
                omap_hsmmc_xfer_done(host, data);
+}
 
-       spin_unlock(&host->irq_lock);
+/*
+ * MMC controller IRQ handler
+ */
+static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
+{
+       struct omap_hsmmc_host *host = dev_id;
+       int status;
+
+       status = OMAP_HSMMC_READ(host->base, STAT);
+       do {
+               omap_hsmmc_do_irq(host, status);
+               /* Flush posted write */
+               status = OMAP_HSMMC_READ(host->base, STAT);
+       } while (status & INT_EN_MASK);
 
        return IRQ_HANDLED;
 }
@@ -1244,31 +1267,47 @@ static void omap_hsmmc_config_dma_params(struct omap_hsmmc_host *host,
 /*
  * DMA call back function
  */
-static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *data)
+static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data)
 {
-       struct omap_hsmmc_host *host = data;
+       struct omap_hsmmc_host *host = cb_data;
+       struct mmc_data *data = host->mrq->data;
+       int dma_ch, req_in_progress;
 
        if (ch_status & OMAP2_DMA_MISALIGNED_ERR_IRQ)
                dev_dbg(mmc_dev(host->mmc), "MISALIGNED_ADRS_ERR\n");
 
-       if (host->dma_ch < 0)
+       spin_lock(&host->irq_lock);
+       if (host->dma_ch < 0) {
+               spin_unlock(&host->irq_lock);
                return;
+       }
 
        host->dma_sg_idx++;
        if (host->dma_sg_idx < host->dma_len) {
                /* Fire up the next transfer. */
-               omap_hsmmc_config_dma_params(host, host->data,
-                                          host->data->sg + host->dma_sg_idx);
+               omap_hsmmc_config_dma_params(host, data,
+                                          data->sg + host->dma_sg_idx);
+               spin_unlock(&host->irq_lock);
                return;
        }
 
-       omap_free_dma(host->dma_ch);
+       dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->dma_len,
+               omap_hsmmc_get_dma_dir(host, data));
+
+       req_in_progress = host->req_in_progress;
+       dma_ch = host->dma_ch;
        host->dma_ch = -1;
-       /*
-        * DMA Callback: run in interrupt context.
-        * mutex_unlock will throw a kernel warning if used.
-        */
-       up(&host->sem);
+       spin_unlock(&host->irq_lock);
+
+       omap_free_dma(dma_ch);
+
+       /* If DMA has finished after TC, complete the request */
+       if (!req_in_progress) {
+               struct mmc_request *mrq = host->mrq;
+
+               host->mrq = NULL;
+               mmc_request_done(host->mmc, mrq);
+       }
 }
 
 /*
@@ -1277,7 +1316,7 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *data)
 static int omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host,
                                        struct mmc_request *req)
 {
-       int dma_ch = 0, ret = 0, err = 1, i;
+       int dma_ch = 0, ret = 0, i;
        struct mmc_data *data = req->data;
 
        /* Sanity check: all the SG entries must be aligned by block size. */
@@ -1294,23 +1333,7 @@ static int omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host,
                 */
                return -EINVAL;
 
-       /*
-        * If for some reason the DMA transfer is still active,
-        * we wait for timeout period and free the dma
-        */
-       if (host->dma_ch != -1) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(100);
-               if (down_trylock(&host->sem)) {
-                       omap_free_dma(host->dma_ch);
-                       host->dma_ch = -1;
-                       up(&host->sem);
-                       return err;
-               }
-       } else {
-               if (down_trylock(&host->sem))
-                       return err;
-       }
+       BUG_ON(host->dma_ch != -1);
 
        ret = omap_request_dma(omap_hsmmc_get_dma_sync_dev(host, data),
                               "MMC/SD", omap_hsmmc_dma_cb, host, &dma_ch);
@@ -1410,37 +1433,27 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req)
        struct omap_hsmmc_host *host = mmc_priv(mmc);
        int err;
 
-       /*
-        * Prevent races with the interrupt handler because of unexpected
-        * interrupts, but not if we are already in interrupt context i.e.
-        * retries.
-        */
-       if (!in_interrupt()) {
-               spin_lock_irqsave(&host->irq_lock, host->flags);
-               /*
-                * Protect the card from I/O if there is a possibility
-                * it can be removed.
-                */
-               if (host->protect_card) {
-                       if (host->reqs_blocked < 3) {
-                               /*
-                                * Ensure the controller is left in a consistent
-                                * state by resetting the command and data state
-                                * machines.
-                                */
-                               omap_hsmmc_reset_controller_fsm(host, SRD);
-                               omap_hsmmc_reset_controller_fsm(host, SRC);
-                               host->reqs_blocked += 1;
-                       }
-                       req->cmd->error = -EBADF;
-                       if (req->data)
-                               req->data->error = -EBADF;
-                       spin_unlock_irqrestore(&host->irq_lock, host->flags);
-                       mmc_request_done(mmc, req);
-                       return;
-               } else if (host->reqs_blocked)
-                       host->reqs_blocked = 0;
-       }
+       BUG_ON(host->req_in_progress);
+       BUG_ON(host->dma_ch != -1);
+       if (host->protect_card) {
+               if (host->reqs_blocked < 3) {
+                       /*
+                        * Ensure the controller is left in a consistent
+                        * state by resetting the command and data state
+                        * machines.
+                        */
+                       omap_hsmmc_reset_controller_fsm(host, SRD);
+                       omap_hsmmc_reset_controller_fsm(host, SRC);
+                       host->reqs_blocked += 1;
+               }
+               req->cmd->error = -EBADF;
+               if (req->data)
+                       req->data->error = -EBADF;
+               req->cmd->retries = 0;
+               mmc_request_done(mmc, req);
+               return;
+       } else if (host->reqs_blocked)
+               host->reqs_blocked = 0;
        WARN_ON(host->mrq != NULL);
        host->mrq = req;
        err = omap_hsmmc_prepare_data(host, req);
@@ -1449,8 +1462,6 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req)
                if (req->data)
                        req->data->error = err;
                host->mrq = NULL;
-               if (!in_interrupt())
-                       spin_unlock_irqrestore(&host->irq_lock, host->flags);
                mmc_request_done(mmc, req);
                return;
        }
@@ -2019,7 +2030,6 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
        mmc->f_min      = 400000;
        mmc->f_max      = 52000000;
 
-       sema_init(&host->sem, 1);
        spin_lock_init(&host->irq_lock);
 
        host->iclk = clk_get(&pdev->dev, "ick");
@@ -2162,8 +2172,7 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
                }
        }
 
-       OMAP_HSMMC_WRITE(host->base, ISE, INT_EN_MASK);
-       OMAP_HSMMC_WRITE(host->base, IE, INT_EN_MASK);
+       omap_hsmmc_disable_irq(host);
 
        mmc_host_lazy_disable(host->mmc);
 
@@ -2258,10 +2267,12 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int omap_hsmmc_suspend(struct platform_device *pdev, pm_message_t state)
+static int omap_hsmmc_suspend(struct device *dev)
 {
        int ret = 0;
+       struct platform_device *pdev = to_platform_device(dev);
        struct omap_hsmmc_host *host = platform_get_drvdata(pdev);
+       pm_message_t state = PMSG_SUSPEND; /* unused by MMC core */
 
        if (host && host->suspended)
                return 0;
@@ -2281,12 +2292,9 @@ static int omap_hsmmc_suspend(struct platform_device *pdev, pm_message_t state)
                }
                cancel_work_sync(&host->mmc_carddetect_work);
                mmc_host_enable(host->mmc);
-               ret = mmc_suspend_host(host->mmc, state);
+               ret = mmc_suspend_host(host->mmc);
                if (ret == 0) {
-                       OMAP_HSMMC_WRITE(host->base, ISE, 0);
-                       OMAP_HSMMC_WRITE(host->base, IE, 0);
-
-
+                       omap_hsmmc_disable_irq(host);
                        OMAP_HSMMC_WRITE(host->base, HCTL,
                                OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
                        mmc_host_disable(host->mmc);
@@ -2310,9 +2318,10 @@ static int omap_hsmmc_suspend(struct platform_device *pdev, pm_message_t state)
 }
 
 /* Routine to resume the MMC device */
-static int omap_hsmmc_resume(struct platform_device *pdev)
+static int omap_hsmmc_resume(struct device *dev)
 {
        int ret = 0;
+       struct platform_device *pdev = to_platform_device(dev);
        struct omap_hsmmc_host *host = platform_get_drvdata(pdev);
 
        if (host && !host->suspended)
@@ -2363,13 +2372,17 @@ clk_en_err:
 #define omap_hsmmc_resume              NULL
 #endif
 
-static struct platform_driver omap_hsmmc_driver = {
-       .remove         = omap_hsmmc_remove,
+static struct dev_pm_ops omap_hsmmc_dev_pm_ops = {
        .suspend        = omap_hsmmc_suspend,
        .resume         = omap_hsmmc_resume,
+};
+
+static struct platform_driver omap_hsmmc_driver = {
+       .remove         = omap_hsmmc_remove,
        .driver         = {
                .name = DRIVER_NAME,
                .owner = THIS_MODULE,
+               .pm = &omap_hsmmc_dev_pm_ops,
        },
 };
 
index e4f00e7..0a4e43f 100644 (file)
@@ -813,7 +813,7 @@ static int pxamci_suspend(struct device *dev)
        int ret = 0;
 
        if (mmc)
-               ret = mmc_suspend_host(mmc, PMSG_SUSPEND);
+               ret = mmc_suspend_host(mmc);
 
        return ret;
 }
index 2fdf768..2e16e0a 100644 (file)
@@ -1881,9 +1881,8 @@ MODULE_DEVICE_TABLE(platform, s3cmci_driver_ids);
 static int s3cmci_suspend(struct device *dev)
 {
        struct mmc_host *mmc = platform_get_drvdata(to_platform_device(dev));
-       struct pm_message event = { PM_EVENT_SUSPEND };
 
-       return mmc_suspend_host(mmc, event);
+       return mmc_suspend_host(mmc);
 }
 
 static int s3cmci_resume(struct device *dev)
index 7802a54..a2e9820 100644 (file)
@@ -89,7 +89,7 @@ static int sdhci_of_suspend(struct of_device *ofdev, pm_message_t state)
 {
        struct sdhci_host *host = dev_get_drvdata(&ofdev->dev);
 
-       return mmc_suspend_host(host->mmc, state);
+       return mmc_suspend_host(host->mmc);
 }
 
 static int sdhci_of_resume(struct of_device *ofdev)
index d5b11a1..c8623de 100644 (file)
@@ -129,12 +129,12 @@ struct sdhci_of_data sdhci_esdhc = {
                  SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET |
                  SDHCI_QUIRK_NO_CARD_NO_RESET,
        .ops = {
-               .readl = sdhci_be32bs_readl,
-               .readw = esdhc_readw,
-               .readb = sdhci_be32bs_readb,
-               .writel = sdhci_be32bs_writel,
-               .writew = esdhc_writew,
-               .writeb = esdhc_writeb,
+               .read_l = sdhci_be32bs_readl,
+               .read_w = esdhc_readw,
+               .read_b = sdhci_be32bs_readb,
+               .write_l = sdhci_be32bs_writel,
+               .write_w = esdhc_writew,
+               .write_b = esdhc_writeb,
                .set_clock = esdhc_set_clock,
                .enable_dma = esdhc_enable_dma,
                .get_max_clock = esdhc_get_max_clock,
index 35117f3..68ddb75 100644 (file)
@@ -55,11 +55,11 @@ struct sdhci_of_data sdhci_hlwd = {
        .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR |
                  SDHCI_QUIRK_32BIT_DMA_SIZE,
        .ops = {
-               .readl = sdhci_be32bs_readl,
-               .readw = sdhci_be32bs_readw,
-               .readb = sdhci_be32bs_readb,
-               .writel = sdhci_hlwd_writel,
-               .writew = sdhci_hlwd_writew,
-               .writeb = sdhci_hlwd_writeb,
+               .read_l = sdhci_be32bs_readl,
+               .read_w = sdhci_be32bs_readw,
+               .read_b = sdhci_be32bs_readb,
+               .write_l = sdhci_hlwd_writel,
+               .write_w = sdhci_hlwd_writew,
+               .write_b = sdhci_hlwd_writeb,
        },
 };
index 6701af6..65483fd 100644 (file)
@@ -628,7 +628,7 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
        host = sdhci_alloc_host(&pdev->dev, sizeof(struct sdhci_pci_slot));
        if (IS_ERR(host)) {
                dev_err(&pdev->dev, "cannot allocate host\n");
-               return ERR_PTR(PTR_ERR(host));
+               return ERR_CAST(host);
        }
 
        slot = sdhci_priv(host);
index 297f40a..b6ee0d7 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/mmc/host.h>
 
 #include <linux/io.h>
+#include <linux/sdhci-pltfm.h>
 
 #include "sdhci.h"
 
@@ -49,19 +50,18 @@ static struct sdhci_ops sdhci_pltfm_ops = {
 
 static int __devinit sdhci_pltfm_probe(struct platform_device *pdev)
 {
+       struct sdhci_pltfm_data *pdata = pdev->dev.platform_data;
        struct sdhci_host *host;
        struct resource *iomem;
        int ret;
 
-       BUG_ON(pdev == NULL);
-
        iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (!iomem) {
                ret = -ENOMEM;
                goto err;
        }
 
-       if (resource_size(iomem) != 0x100)
+       if (resource_size(iomem) < 0x100)
                dev_err(&pdev->dev, "Invalid iomem size. You may "
                        "experience problems.\n");
 
@@ -76,7 +76,12 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev)
        }
 
        host->hw_name = "platform";
-       host->ops = &sdhci_pltfm_ops;
+       if (pdata && pdata->ops)
+               host->ops = pdata->ops;
+       else
+               host->ops = &sdhci_pltfm_ops;
+       if (pdata)
+               host->quirks = pdata->quirks;
        host->irq = platform_get_irq(pdev, 0);
 
        if (!request_mem_region(iomem->start, resource_size(iomem),
@@ -93,6 +98,12 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev)
                goto err_remap;
        }
 
+       if (pdata && pdata->init) {
+               ret = pdata->init(host);
+               if (ret)
+                       goto err_plat_init;
+       }
+
        ret = sdhci_add_host(host);
        if (ret)
                goto err_add_host;
@@ -102,6 +113,9 @@ static int __devinit sdhci_pltfm_probe(struct platform_device *pdev)
        return 0;
 
 err_add_host:
+       if (pdata && pdata->exit)
+               pdata->exit(host);
+err_plat_init:
        iounmap(host->ioaddr);
 err_remap:
        release_mem_region(iomem->start, resource_size(iomem));
@@ -114,6 +128,7 @@ err:
 
 static int __devexit sdhci_pltfm_remove(struct platform_device *pdev)
 {
+       struct sdhci_pltfm_data *pdata = pdev->dev.platform_data;
        struct sdhci_host *host = platform_get_drvdata(pdev);
        struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        int dead;
@@ -125,6 +140,8 @@ static int __devexit sdhci_pltfm_remove(struct platform_device *pdev)
                dead = 1;
 
        sdhci_remove_host(host, dead);
+       if (pdata && pdata->exit)
+               pdata->exit(host);
        iounmap(host->ioaddr);
        release_mem_region(iomem->start, resource_size(iomem));
        sdhci_free_host(host);
@@ -165,4 +182,3 @@ MODULE_DESCRIPTION("Secure Digital Host Controller Interface platform driver");
 MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:sdhci");
-
index 2136794..af21792 100644 (file)
@@ -317,12 +317,7 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev)
        host->irq = irq;
 
        /* Setup quirks for the controller */
-
-       /* Currently with ADMA enabled we are getting some length
-        * interrupts that are not being dealt with, do disable
-        * ADMA until this is sorted out. */
-       host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
-       host->quirks |= SDHCI_QUIRK_32BIT_ADMA_SIZE;
+       host->quirks |= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
 
 #ifndef CONFIG_MMC_SDHCI_S3C_DMA
 
@@ -330,9 +325,6 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev)
         * support as well. */
        host->quirks |= SDHCI_QUIRK_BROKEN_DMA;
 
-       /* PIO currently has problems with multi-block IO */
-       host->quirks |= SDHCI_QUIRK_NO_MULTIBLOCK;
-
 #endif /* CONFIG_MMC_SDHCI_S3C_DMA */
 
        /* It seems we do not get an DATA transfer complete on non-busy
diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
new file mode 100644 (file)
index 0000000..d70c54c
--- /dev/null
@@ -0,0 +1,298 @@
+/*
+ * drivers/mmc/host/sdhci-spear.c
+ *
+ * Support of SDHCI platform devices for spear soc family
+ *
+ * Copyright (C) 2010 ST Microelectronics
+ * Viresh Kumar<viresh.kumar@st.com>
+ *
+ * Inspired by sdhci-pltfm.c
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/sdhci-spear.h>
+#include <linux/io.h>
+#include "sdhci.h"
+
+struct spear_sdhci {
+       struct clk *clk;
+       struct sdhci_plat_data *data;
+};
+
+/* sdhci ops */
+static struct sdhci_ops sdhci_pltfm_ops = {
+       /* Nothing to do for now. */
+};
+
+/* gpio card detection interrupt handler */
+static irqreturn_t sdhci_gpio_irq(int irq, void *dev_id)
+{
+       struct platform_device *pdev = dev_id;
+       struct sdhci_host *host = platform_get_drvdata(pdev);
+       struct spear_sdhci *sdhci = dev_get_platdata(&pdev->dev);
+       unsigned long gpio_irq_type;
+       int val;
+
+       val = gpio_get_value(sdhci->data->card_int_gpio);
+
+       /* val == 1 -> card removed, val == 0 -> card inserted */
+       /* if card removed - set irq for low level, else vice versa */
+       gpio_irq_type = val ? IRQF_TRIGGER_LOW : IRQF_TRIGGER_HIGH;
+       set_irq_type(irq, gpio_irq_type);
+
+       if (sdhci->data->card_power_gpio >= 0) {
+               if (!sdhci->data->power_always_enb) {
+                       /* if card inserted, give power, otherwise remove it */
+                       val = sdhci->data->power_active_high ? !val : val ;
+                       gpio_set_value(sdhci->data->card_power_gpio, val);
+               }
+       }
+
+       /* inform sdhci driver about card insertion/removal */
+       tasklet_schedule(&host->card_tasklet);
+
+       return IRQ_HANDLED;
+}
+
+static int __devinit sdhci_probe(struct platform_device *pdev)
+{
+       struct sdhci_host *host;
+       struct resource *iomem;
+       struct spear_sdhci *sdhci;
+       int ret;
+
+       BUG_ON(pdev == NULL);
+
+       iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!iomem) {
+               ret = -ENOMEM;
+               dev_dbg(&pdev->dev, "memory resource not defined\n");
+               goto err;
+       }
+
+       if (!request_mem_region(iomem->start, resource_size(iomem),
+                               "spear-sdhci")) {
+               ret = -EBUSY;
+               dev_dbg(&pdev->dev, "cannot request region\n");
+               goto err;
+       }
+
+       sdhci = kzalloc(sizeof(*sdhci), GFP_KERNEL);
+       if (!sdhci) {
+               ret = -ENOMEM;
+               dev_dbg(&pdev->dev, "cannot allocate memory for sdhci\n");
+               goto err_kzalloc;
+       }
+
+       /* clk enable */
+       sdhci->clk = clk_get(&pdev->dev, NULL);
+       if (IS_ERR(sdhci->clk)) {
+               ret = PTR_ERR(sdhci->clk);
+               dev_dbg(&pdev->dev, "Error getting clock\n");
+               goto err_clk_get;
+       }
+
+       ret = clk_enable(sdhci->clk);
+       if (ret) {
+               dev_dbg(&pdev->dev, "Error enabling clock\n");
+               goto err_clk_enb;
+       }
+
+       /* overwrite platform_data */
+       sdhci->data = dev_get_platdata(&pdev->dev);
+       pdev->dev.platform_data = sdhci;
+
+       if (pdev->dev.parent)
+               host = sdhci_alloc_host(pdev->dev.parent, 0);
+       else
+               host = sdhci_alloc_host(&pdev->dev, 0);
+
+       if (IS_ERR(host)) {
+               ret = PTR_ERR(host);
+               dev_dbg(&pdev->dev, "error allocating host\n");
+               goto err_alloc_host;
+       }
+
+       host->hw_name = "sdhci";
+       host->ops = &sdhci_pltfm_ops;
+       host->irq = platform_get_irq(pdev, 0);
+       host->quirks = SDHCI_QUIRK_BROKEN_ADMA;
+
+       host->ioaddr = ioremap(iomem->start, resource_size(iomem));
+       if (!host->ioaddr) {
+               ret = -ENOMEM;
+               dev_dbg(&pdev->dev, "failed to remap registers\n");
+               goto err_ioremap;
+       }
+
+       ret = sdhci_add_host(host);
+       if (ret) {
+               dev_dbg(&pdev->dev, "error adding host\n");
+               goto err_add_host;
+       }
+
+       platform_set_drvdata(pdev, host);
+
+       /*
+        * It is optional to use GPIOs for sdhci Power control & sdhci card
+        * interrupt detection. If sdhci->data is NULL, then use original sdhci
+        * lines otherwise GPIO lines.
+        * If GPIO is selected for power control, then power should be disabled
+        * after card removal and should be enabled when card insertion
+        * interrupt occurs
+        */
+       if (!sdhci->data)
+               return 0;
+
+       if (sdhci->data->card_power_gpio >= 0) {
+               int val = 0;
+
+               ret = gpio_request(sdhci->data->card_power_gpio, "sdhci");
+               if (ret < 0) {
+                       dev_dbg(&pdev->dev, "gpio request fail: %d\n",
+                                       sdhci->data->card_power_gpio);
+                       goto err_pgpio_request;
+               }
+
+               if (sdhci->data->power_always_enb)
+                       val = sdhci->data->power_active_high;
+               else
+                       val = !sdhci->data->power_active_high;
+
+               ret = gpio_direction_output(sdhci->data->card_power_gpio, val);
+               if (ret) {
+                       dev_dbg(&pdev->dev, "gpio set direction fail: %d\n",
+                                       sdhci->data->card_power_gpio);
+                       goto err_pgpio_direction;
+               }
+
+               gpio_set_value(sdhci->data->card_power_gpio, 1);
+       }
+
+       if (sdhci->data->card_int_gpio >= 0) {
+               ret = gpio_request(sdhci->data->card_int_gpio, "sdhci");
+               if (ret < 0) {
+                       dev_dbg(&pdev->dev, "gpio request fail: %d\n",
+                                       sdhci->data->card_int_gpio);
+                       goto err_igpio_request;
+               }
+
+               ret = gpio_direction_input(sdhci->data->card_int_gpio);
+               if (ret) {
+                       dev_dbg(&pdev->dev, "gpio set direction fail: %d\n",
+                                       sdhci->data->card_int_gpio);
+                       goto err_igpio_direction;
+               }
+               ret = request_irq(gpio_to_irq(sdhci->data->card_int_gpio),
+                               sdhci_gpio_irq, IRQF_TRIGGER_LOW,
+                               mmc_hostname(host->mmc), pdev);
+               if (ret) {
+                       dev_dbg(&pdev->dev, "gpio request irq fail: %d\n",
+                                       sdhci->data->card_int_gpio);
+                       goto err_igpio_request_irq;
+               }
+
+       }
+
+       return 0;
+
+err_igpio_request_irq:
+err_igpio_direction:
+       if (sdhci->data->card_int_gpio >= 0)
+               gpio_free(sdhci->data->card_int_gpio);
+err_igpio_request:
+err_pgpio_direction:
+       if (sdhci->data->card_power_gpio >= 0)
+               gpio_free(sdhci->data->card_power_gpio);
+err_pgpio_request:
+       platform_set_drvdata(pdev, NULL);
+       sdhci_remove_host(host, 1);
+err_add_host:
+       iounmap(host->ioaddr);
+err_ioremap:
+       sdhci_free_host(host);
+err_alloc_host:
+       clk_disable(sdhci->clk);
+err_clk_enb:
+       clk_put(sdhci->clk);
+err_clk_get:
+       kfree(sdhci);
+err_kzalloc:
+       release_mem_region(iomem->start, resource_size(iomem));
+err:
+       dev_err(&pdev->dev, "spear-sdhci probe failed: %d\n", ret);
+       return ret;
+}
+
+static int __devexit sdhci_remove(struct platform_device *pdev)
+{
+       struct sdhci_host *host = platform_get_drvdata(pdev);
+       struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       struct spear_sdhci *sdhci = dev_get_platdata(&pdev->dev);
+       int dead;
+       u32 scratch;
+
+       if (sdhci->data) {
+               if (sdhci->data->card_int_gpio >= 0) {
+                       free_irq(gpio_to_irq(sdhci->data->card_int_gpio), pdev);
+                       gpio_free(sdhci->data->card_int_gpio);
+               }
+
+               if (sdhci->data->card_power_gpio >= 0)
+                       gpio_free(sdhci->data->card_power_gpio);
+       }
+
+       platform_set_drvdata(pdev, NULL);
+       dead = 0;
+       scratch = readl(host->ioaddr + SDHCI_INT_STATUS);
+       if (scratch == (u32)-1)
+               dead = 1;
+
+       sdhci_remove_host(host, dead);
+       iounmap(host->ioaddr);
+       sdhci_free_host(host);
+       clk_disable(sdhci->clk);
+       clk_put(sdhci->clk);
+       kfree(sdhci);
+       if (iomem)
+               release_mem_region(iomem->start, resource_size(iomem));
+
+       return 0;
+}
+
+static struct platform_driver sdhci_driver = {
+       .driver = {
+               .name   = "sdhci",
+               .owner  = THIS_MODULE,
+       },
+       .probe          = sdhci_probe,
+       .remove         = __devexit_p(sdhci_remove),
+};
+
+static int __init sdhci_init(void)
+{
+       return platform_driver_register(&sdhci_driver);
+}
+module_init(sdhci_init);
+
+static void __exit sdhci_exit(void)
+{
+       platform_driver_unregister(&sdhci_driver);
+}
+module_exit(sdhci_exit);
+
+MODULE_DESCRIPTION("SPEAr Secure Digital Host Controller Interface driver");
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_LICENSE("GPL v2");
index 9d4fdfa..c6d1bd8 100644 (file)
@@ -496,12 +496,22 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
                WARN_ON((desc - host->adma_desc) > (128 * 2 + 1) * 4);
        }
 
-       /*
-        * Add a terminating entry.
-        */
+       if (host->quirks & SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC) {
+               /*
+               * Mark the last descriptor as the terminating descriptor
+               */
+               if (desc != host->adma_desc) {
+                       desc -= 8;
+                       desc[0] |= 0x2; /* end */
+               }
+       } else {
+               /*
+               * Add a terminating entry.
+               */
 
-       /* nop, end, valid */
-       sdhci_set_adma_desc(desc, 0, 0, 0x3);
+               /* nop, end, valid */
+               sdhci_set_adma_desc(desc, 0, 0, 0x3);
+       }
 
        /*
         * Resync align buffer as we might have changed it.
@@ -1587,7 +1597,7 @@ int sdhci_suspend_host(struct sdhci_host *host, pm_message_t state)
 
        sdhci_disable_card_detection(host);
 
-       ret = mmc_suspend_host(host->mmc, state);
+       ret = mmc_suspend_host(host->mmc);
        if (ret)
                return ret;
 
@@ -1744,7 +1754,8 @@ int sdhci_add_host(struct sdhci_host *host)
        host->max_clk =
                (caps & SDHCI_CLOCK_BASE_MASK) >> SDHCI_CLOCK_BASE_SHIFT;
        host->max_clk *= 1000000;
-       if (host->max_clk == 0) {
+       if (host->max_clk == 0 || host->quirks &
+                       SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN) {
                if (!host->ops->get_max_clock) {
                        printk(KERN_ERR
                               "%s: Hardware doesn't specify base clock "
index 842f46f..c846813 100644 (file)
 #define  SDHCI_INT_DATA_MASK   (SDHCI_INT_DATA_END | SDHCI_INT_DMA_END | \
                SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL | \
                SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_DATA_CRC | \
-               SDHCI_INT_DATA_END_BIT | SDHCI_ADMA_ERROR)
+               SDHCI_INT_DATA_END_BIT | SDHCI_INT_ADMA_ERROR)
 #define SDHCI_INT_ALL_MASK     ((unsigned int)-1)
 
 #define SDHCI_ACMD12_ERR       0x3C
@@ -236,6 +236,10 @@ struct sdhci_host {
 #define SDHCI_QUIRK_DELAY_AFTER_POWER                  (1<<23)
 /* Controller uses SDCLK instead of TMCLK for data timeouts */
 #define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK            (1<<24)
+/* Controller reports wrong base clock capability */
+#define SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN              (1<<25)
+/* Controller cannot support End Attribute in NOP ADMA descriptor */
+#define SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC              (1<<26)
 
        int                     irq;            /* Device IRQ */
        void __iomem *          ioaddr;         /* Mapped address */
@@ -294,12 +298,12 @@ struct sdhci_host {
 
 struct sdhci_ops {
 #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
-       u32             (*readl)(struct sdhci_host *host, int reg);
-       u16             (*readw)(struct sdhci_host *host, int reg);
-       u8              (*readb)(struct sdhci_host *host, int reg);
-       void            (*writel)(struct sdhci_host *host, u32 val, int reg);
-       void            (*writew)(struct sdhci_host *host, u16 val, int reg);
-       void            (*writeb)(struct sdhci_host *host, u8 val, int reg);
+       u32             (*read_l)(struct sdhci_host *host, int reg);
+       u16             (*read_w)(struct sdhci_host *host, int reg);
+       u8              (*read_b)(struct sdhci_host *host, int reg);
+       void            (*write_l)(struct sdhci_host *host, u32 val, int reg);
+       void            (*write_w)(struct sdhci_host *host, u16 val, int reg);
+       void            (*write_b)(struct sdhci_host *host, u8 val, int reg);
 #endif
 
        void    (*set_clock)(struct sdhci_host *host, unsigned int clock);
@@ -314,48 +318,48 @@ struct sdhci_ops {
 
 static inline void sdhci_writel(struct sdhci_host *host, u32 val, int reg)
 {
-       if (unlikely(host->ops->writel))
-               host->ops->writel(host, val, reg);
+       if (unlikely(host->ops->write_l))
+               host->ops->write_l(host, val, reg);
        else
                writel(val, host->ioaddr + reg);
 }
 
 static inline void sdhci_writew(struct sdhci_host *host, u16 val, int reg)
 {
-       if (unlikely(host->ops->writew))
-               host->ops->writew(host, val, reg);
+       if (unlikely(host->ops->write_w))
+               host->ops->write_w(host, val, reg);
        else
                writew(val, host->ioaddr + reg);
 }
 
 static inline void sdhci_writeb(struct sdhci_host *host, u8 val, int reg)
 {
-       if (unlikely(host->ops->writeb))
-               host->ops->writeb(host, val, reg);
+       if (unlikely(host->ops->write_b))
+               host->ops->write_b(host, val, reg);
        else
                writeb(val, host->ioaddr + reg);
 }
 
 static inline u32 sdhci_readl(struct sdhci_host *host, int reg)
 {
-       if (unlikely(host->ops->readl))
-               return host->ops->readl(host, reg);
+       if (unlikely(host->ops->read_l))
+               return host->ops->read_l(host, reg);
        else
                return readl(host->ioaddr + reg);
 }
 
 static inline u16 sdhci_readw(struct sdhci_host *host, int reg)
 {
-       if (unlikely(host->ops->readw))
-               return host->ops->readw(host, reg);
+       if (unlikely(host->ops->read_w))
+               return host->ops->read_w(host, reg);
        else
                return readw(host->ioaddr + reg);
 }
 
 static inline u8 sdhci_readb(struct sdhci_host *host, int reg)
 {
-       if (unlikely(host->ops->readb))
-               return host->ops->readb(host, reg);
+       if (unlikely(host->ops->read_b))
+               return host->ops->read_b(host, reg);
        else
                return readb(host->ioaddr + reg);
 }
index cb41e9c..e7507af 100644 (file)
@@ -519,7 +519,7 @@ static int sdricoh_pcmcia_suspend(struct pcmcia_device *link)
 {
        struct mmc_host *mmc = link->priv;
        dev_dbg(&link->dev, "suspend\n");
-       mmc_suspend_host(mmc, PMSG_SUSPEND);
+       mmc_suspend_host(mmc);
        return 0;
 }
 
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
new file mode 100644 (file)
index 0000000..eb97830
--- /dev/null
@@ -0,0 +1,965 @@
+/*
+ * MMCIF eMMC driver.
+ *
+ * Copyright (C) 2010 Renesas Solutions Corp.
+ * Yusuke Goda <yusuke.goda.sx@renesas.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ *
+ * TODO
+ *  1. DMA
+ *  2. Power management
+ *  3. Handle MMC errors better
+ *
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/core.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/sdio.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/mmc/sh_mmcif.h>
+
+#define DRIVER_NAME    "sh_mmcif"
+#define DRIVER_VERSION "2010-04-28"
+
+#define MMCIF_CE_CMD_SET       0x00000000
+#define MMCIF_CE_ARG           0x00000008
+#define MMCIF_CE_ARG_CMD12     0x0000000C
+#define MMCIF_CE_CMD_CTRL      0x00000010
+#define MMCIF_CE_BLOCK_SET     0x00000014
+#define MMCIF_CE_CLK_CTRL      0x00000018
+#define MMCIF_CE_BUF_ACC       0x0000001C
+#define MMCIF_CE_RESP3         0x00000020
+#define MMCIF_CE_RESP2         0x00000024
+#define MMCIF_CE_RESP1         0x00000028
+#define MMCIF_CE_RESP0         0x0000002C
+#define MMCIF_CE_RESP_CMD12    0x00000030
+#define MMCIF_CE_DATA          0x00000034
+#define MMCIF_CE_INT           0x00000040
+#define MMCIF_CE_INT_MASK      0x00000044
+#define MMCIF_CE_HOST_STS1     0x00000048
+#define MMCIF_CE_HOST_STS2     0x0000004C
+#define MMCIF_CE_VERSION       0x0000007C
+
+/* CE_CMD_SET */
+#define CMD_MASK               0x3f000000
+#define CMD_SET_RTYP_NO                ((0 << 23) | (0 << 22))
+#define CMD_SET_RTYP_6B                ((0 << 23) | (1 << 22)) /* R1/R1b/R3/R4/R5 */
+#define CMD_SET_RTYP_17B       ((1 << 23) | (0 << 22)) /* R2 */
+#define CMD_SET_RBSY           (1 << 21) /* R1b */
+#define CMD_SET_CCSEN          (1 << 20)
+#define CMD_SET_WDAT           (1 << 19) /* 1: on data, 0: no data */
+#define CMD_SET_DWEN           (1 << 18) /* 1: write, 0: read */
+#define CMD_SET_CMLTE          (1 << 17) /* 1: multi block trans, 0: single */
+#define CMD_SET_CMD12EN                (1 << 16) /* 1: CMD12 auto issue */
+#define CMD_SET_RIDXC_INDEX    ((0 << 15) | (0 << 14)) /* index check */
+#define CMD_SET_RIDXC_BITS     ((0 << 15) | (1 << 14)) /* check bits check */
+#define CMD_SET_RIDXC_NO       ((1 << 15) | (0 << 14)) /* no check */
+#define CMD_SET_CRC7C          ((0 << 13) | (0 << 12)) /* CRC7 check*/
+#define CMD_SET_CRC7C_BITS     ((0 << 13) | (1 << 12)) /* check bits check*/
+#define CMD_SET_CRC7C_INTERNAL ((1 << 13) | (0 << 12)) /* internal CRC7 check*/
+#define CMD_SET_CRC16C         (1 << 10) /* 0: CRC16 check*/
+#define CMD_SET_CRCSTE         (1 << 8) /* 1: not receive CRC status */
+#define CMD_SET_TBIT           (1 << 7) /* 1: tran mission bit "Low" */
+#define CMD_SET_OPDM           (1 << 6) /* 1: open/drain */
+#define CMD_SET_CCSH           (1 << 5)
+#define CMD_SET_DATW_1         ((0 << 1) | (0 << 0)) /* 1bit */
+#define CMD_SET_DATW_4         ((0 << 1) | (1 << 0)) /* 4bit */
+#define CMD_SET_DATW_8         ((1 << 1) | (0 << 0)) /* 8bit */
+
+/* CE_CMD_CTRL */
+#define CMD_CTRL_BREAK         (1 << 0)
+
+/* CE_BLOCK_SET */
+#define BLOCK_SIZE_MASK                0x0000ffff
+
+/* CE_CLK_CTRL */
+#define CLK_ENABLE             (1 << 24) /* 1: output mmc clock */
+#define CLK_CLEAR              ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
+#define CLK_SUP_PCLK           ((1 << 19) | (1 << 18) | (1 << 17) | (1 << 16))
+#define SRSPTO_256             ((1 << 13) | (0 << 12)) /* resp timeout */
+#define SRBSYTO_29             ((1 << 11) | (1 << 10) |        \
+                                (1 << 9) | (1 << 8)) /* resp busy timeout */
+#define SRWDTO_29              ((1 << 7) | (1 << 6) |          \
+                                (1 << 5) | (1 << 4)) /* read/write timeout */
+#define SCCSTO_29              ((1 << 3) | (1 << 2) |          \
+                                (1 << 1) | (1 << 0)) /* ccs timeout */
+
+/* CE_BUF_ACC */
+#define BUF_ACC_DMAWEN         (1 << 25)
+#define BUF_ACC_DMAREN         (1 << 24)
+#define BUF_ACC_BUSW_32                (0 << 17)
+#define BUF_ACC_BUSW_16                (1 << 17)
+#define BUF_ACC_ATYP           (1 << 16)
+
+/* CE_INT */
+#define INT_CCSDE              (1 << 29)
+#define INT_CMD12DRE           (1 << 26)
+#define INT_CMD12RBE           (1 << 25)
+#define INT_CMD12CRE           (1 << 24)
+#define INT_DTRANE             (1 << 23)
+#define INT_BUFRE              (1 << 22)
+#define INT_BUFWEN             (1 << 21)
+#define INT_BUFREN             (1 << 20)
+#define INT_CCSRCV             (1 << 19)
+#define INT_RBSYE              (1 << 17)
+#define INT_CRSPE              (1 << 16)
+#define INT_CMDVIO             (1 << 15)
+#define INT_BUFVIO             (1 << 14)
+#define INT_WDATERR            (1 << 11)
+#define INT_RDATERR            (1 << 10)
+#define INT_RIDXERR            (1 << 9)
+#define INT_RSPERR             (1 << 8)
+#define INT_CCSTO              (1 << 5)
+#define INT_CRCSTO             (1 << 4)
+#define INT_WDATTO             (1 << 3)
+#define INT_RDATTO             (1 << 2)
+#define INT_RBSYTO             (1 << 1)
+#define INT_RSPTO              (1 << 0)
+#define INT_ERR_STS            (INT_CMDVIO | INT_BUFVIO | INT_WDATERR |  \
+                                INT_RDATERR | INT_RIDXERR | INT_RSPERR | \
+                                INT_CCSTO | INT_CRCSTO | INT_WDATTO |    \
+                                INT_RDATTO | INT_RBSYTO | INT_RSPTO)
+
+/* CE_INT_MASK */
+#define MASK_ALL               0x00000000
+#define MASK_MCCSDE            (1 << 29)
+#define MASK_MCMD12DRE         (1 << 26)
+#define MASK_MCMD12RBE         (1 << 25)
+#define MASK_MCMD12CRE         (1 << 24)
+#define MASK_MDTRANE           (1 << 23)
+#define MASK_MBUFRE            (1 << 22)
+#define MASK_MBUFWEN           (1 << 21)
+#define MASK_MBUFREN           (1 << 20)
+#define MASK_MCCSRCV           (1 << 19)
+#define MASK_MRBSYE            (1 << 17)
+#define MASK_MCRSPE            (1 << 16)
+#define MASK_MCMDVIO           (1 << 15)
+#define MASK_MBUFVIO           (1 << 14)
+#define MASK_MWDATERR          (1 << 11)
+#define MASK_MRDATERR          (1 << 10)
+#define MASK_MRIDXERR          (1 << 9)
+#define MASK_MRSPERR           (1 << 8)
+#define MASK_MCCSTO            (1 << 5)
+#define MASK_MCRCSTO           (1 << 4)
+#define MASK_MWDATTO           (1 << 3)
+#define MASK_MRDATTO           (1 << 2)
+#define MASK_MRBSYTO           (1 << 1)
+#define MASK_MRSPTO            (1 << 0)
+
+/* CE_HOST_STS1 */
+#define STS1_CMDSEQ            (1 << 31)
+
+/* CE_HOST_STS2 */
+#define STS2_CRCSTE            (1 << 31)
+#define STS2_CRC16E            (1 << 30)
+#define STS2_AC12CRCE          (1 << 29)
+#define STS2_RSPCRC7E          (1 << 28)
+#define STS2_CRCSTEBE          (1 << 27)
+#define STS2_RDATEBE           (1 << 26)
+#define STS2_AC12REBE          (1 << 25)
+#define STS2_RSPEBE            (1 << 24)
+#define STS2_AC12IDXE          (1 << 23)
+#define STS2_RSPIDXE           (1 << 22)
+#define STS2_CCSTO             (1 << 15)
+#define STS2_RDATTO            (1 << 14)
+#define STS2_DATBSYTO          (1 << 13)
+#define STS2_CRCSTTO           (1 << 12)
+#define STS2_AC12BSYTO         (1 << 11)
+#define STS2_RSPBSYTO          (1 << 10)
+#define STS2_AC12RSPTO         (1 << 9)
+#define STS2_RSPTO             (1 << 8)
+#define STS2_CRC_ERR           (STS2_CRCSTE | STS2_CRC16E |            \
+                                STS2_AC12CRCE | STS2_RSPCRC7E | STS2_CRCSTEBE)
+#define STS2_TIMEOUT_ERR       (STS2_CCSTO | STS2_RDATTO |             \
+                                STS2_DATBSYTO | STS2_CRCSTTO |         \
+                                STS2_AC12BSYTO | STS2_RSPBSYTO |       \
+                                STS2_AC12RSPTO | STS2_RSPTO)
+
+/* CE_VERSION */
+#define SOFT_RST_ON            (1 << 31)
+#define SOFT_RST_OFF           (0 << 31)
+
+#define CLKDEV_EMMC_DATA       52000000 /* 52MHz */
+#define CLKDEV_MMC_DATA                20000000 /* 20MHz */
+#define CLKDEV_INIT            400000   /* 400 KHz */
+
+struct sh_mmcif_host {
+       struct mmc_host *mmc;
+       struct mmc_data *data;
+       struct mmc_command *cmd;
+       struct platform_device *pd;
+       struct clk *hclk;
+       unsigned int clk;
+       int bus_width;
+       u16 wait_int;
+       u16 sd_error;
+       long timeout;
+       void __iomem *addr;
+       wait_queue_head_t intr_wait;
+};
+
+static inline u32 sh_mmcif_readl(struct sh_mmcif_host *host, unsigned int reg)
+{
+       return readl(host->addr + reg);
+}
+
+static inline void sh_mmcif_writel(struct sh_mmcif_host *host,
+                                       unsigned int reg, u32 val)
+{
+       writel(val, host->addr + reg);
+}
+
+static inline void sh_mmcif_bitset(struct sh_mmcif_host *host,
+                                       unsigned int reg, u32 val)
+{
+       writel(val | sh_mmcif_readl(host, reg), host->addr + reg);
+}
+
+static inline void sh_mmcif_bitclr(struct sh_mmcif_host *host,
+                                       unsigned int reg, u32 val)
+{
+       writel(~val & sh_mmcif_readl(host, reg), host->addr + reg);
+}
+
+
+static void sh_mmcif_clock_control(struct sh_mmcif_host *host, unsigned int clk)
+{
+       struct sh_mmcif_plat_data *p = host->pd->dev.platform_data;
+
+       sh_mmcif_bitclr(host, MMCIF_CE_CLK_CTRL, CLK_ENABLE);
+       sh_mmcif_bitclr(host, MMCIF_CE_CLK_CTRL, CLK_CLEAR);
+
+       if (!clk)
+               return;
+       if (p->sup_pclk && clk == host->clk)
+               sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, CLK_SUP_PCLK);
+       else
+               sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, CLK_CLEAR &
+                       (ilog2(__rounddown_pow_of_two(host->clk / clk)) << 16));
+
+       sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, CLK_ENABLE);
+}
+
+static void sh_mmcif_sync_reset(struct sh_mmcif_host *host)
+{
+       u32 tmp;
+
+       tmp = 0x010f0000 & sh_mmcif_readl(host, MMCIF_CE_CLK_CTRL);
+
+       sh_mmcif_writel(host, MMCIF_CE_VERSION, SOFT_RST_ON);
+       sh_mmcif_writel(host, MMCIF_CE_VERSION, SOFT_RST_OFF);
+       sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, tmp |
+               SRSPTO_256 | SRBSYTO_29 | SRWDTO_29 | SCCSTO_29);
+       /* byte swap on */
+       sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_ATYP);
+}
+
+static int sh_mmcif_error_manage(struct sh_mmcif_host *host)
+{
+       u32 state1, state2;
+       int ret, timeout = 10000000;
+
+       host->sd_error = 0;
+       host->wait_int = 0;
+
+       state1 = sh_mmcif_readl(host, MMCIF_CE_HOST_STS1);
+       state2 = sh_mmcif_readl(host, MMCIF_CE_HOST_STS2);
+       pr_debug("%s: ERR HOST_STS1 = %08x\n", \
+                       DRIVER_NAME, sh_mmcif_readl(host, MMCIF_CE_HOST_STS1));
+       pr_debug("%s: ERR HOST_STS2 = %08x\n", \
+                       DRIVER_NAME, sh_mmcif_readl(host, MMCIF_CE_HOST_STS2));
+
+       if (state1 & STS1_CMDSEQ) {
+               sh_mmcif_bitset(host, MMCIF_CE_CMD_CTRL, CMD_CTRL_BREAK);
+               sh_mmcif_bitset(host, MMCIF_CE_CMD_CTRL, ~CMD_CTRL_BREAK);
+               while (1) {
+                       timeout--;
+                       if (timeout < 0) {
+                               pr_err(DRIVER_NAME": Forceed end of " \
+                                       "command sequence timeout err\n");
+                               return -EIO;
+                       }
+                       if (!(sh_mmcif_readl(host, MMCIF_CE_HOST_STS1)
+                                                               & STS1_CMDSEQ))
+                               break;
+                       mdelay(1);
+               }
+               sh_mmcif_sync_reset(host);
+               pr_debug(DRIVER_NAME": Forced end of command sequence\n");
+               return -EIO;
+       }
+
+       if (state2 & STS2_CRC_ERR) {
+               pr_debug(DRIVER_NAME": Happened CRC error\n");
+               ret = -EIO;
+       } else if (state2 & STS2_TIMEOUT_ERR) {
+               pr_debug(DRIVER_NAME": Happened Timeout error\n");
+               ret = -ETIMEDOUT;
+       } else {
+               pr_debug(DRIVER_NAME": Happened End/Index error\n");
+               ret = -EIO;
+       }
+       return ret;
+}
+
+static int sh_mmcif_single_read(struct sh_mmcif_host *host,
+                                       struct mmc_request *mrq)
+{
+       struct mmc_data *data = mrq->data;
+       long time;
+       u32 blocksize, i, *p = sg_virt(data->sg);
+
+       host->wait_int = 0;
+
+       /* buf read enable */
+       sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
+       time = wait_event_interruptible_timeout(host->intr_wait,
+                       host->wait_int == 1 ||
+                       host->sd_error == 1, host->timeout);
+       if (host->wait_int != 1 && (time == 0 || host->sd_error != 0))
+               return sh_mmcif_error_manage(host);
+
+       host->wait_int = 0;
+       blocksize = (BLOCK_SIZE_MASK &
+                       sh_mmcif_readl(host, MMCIF_CE_BLOCK_SET)) + 3;
+       for (i = 0; i < blocksize / 4; i++)
+               *p++ = sh_mmcif_readl(host, MMCIF_CE_DATA);
+
+       /* buffer read end */
+       sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFRE);
+       time = wait_event_interruptible_timeout(host->intr_wait,
+                       host->wait_int == 1 ||
+                       host->sd_error == 1, host->timeout);
+       if (host->wait_int != 1 && (time == 0 || host->sd_error != 0))
+               return sh_mmcif_error_manage(host);
+
+       host->wait_int = 0;
+       return 0;
+}
+
+static int sh_mmcif_multi_read(struct sh_mmcif_host *host,
+                                       struct mmc_request *mrq)
+{
+       struct mmc_data *data = mrq->data;
+       long time;
+       u32 blocksize, i, j, sec, *p;
+
+       blocksize = BLOCK_SIZE_MASK & sh_mmcif_readl(host, MMCIF_CE_BLOCK_SET);
+       for (j = 0; j < data->sg_len; j++) {
+               p = sg_virt(data->sg);
+               host->wait_int = 0;
+               for (sec = 0; sec < data->sg->length / blocksize; sec++) {
+                       sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
+                       /* buf read enable */
+                       time = wait_event_interruptible_timeout(host->intr_wait,
+                               host->wait_int == 1 ||
+                               host->sd_error == 1, host->timeout);
+
+                       if (host->wait_int != 1 &&
+                           (time == 0 || host->sd_error != 0))
+                               return sh_mmcif_error_manage(host);
+
+                       host->wait_int = 0;
+                       for (i = 0; i < blocksize / 4; i++)
+                               *p++ = sh_mmcif_readl(host, MMCIF_CE_DATA);
+               }
+               if (j < data->sg_len - 1)
+                       data->sg++;
+       }
+       return 0;
+}
+
+static int sh_mmcif_single_write(struct sh_mmcif_host *host,
+                                       struct mmc_request *mrq)
+{
+       struct mmc_data *data = mrq->data;
+       long time;
+       u32 blocksize, i, *p = sg_virt(data->sg);
+
+       host->wait_int = 0;
+       sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
+
+       /* buf write enable */
+       time = wait_event_interruptible_timeout(host->intr_wait,
+                       host->wait_int == 1 ||
+                       host->sd_error == 1, host->timeout);
+       if (host->wait_int != 1 && (time == 0 || host->sd_error != 0))
+               return sh_mmcif_error_manage(host);
+
+       host->wait_int = 0;
+       blocksize = (BLOCK_SIZE_MASK &
+                       sh_mmcif_readl(host, MMCIF_CE_BLOCK_SET)) + 3;
+       for (i = 0; i < blocksize / 4; i++)
+               sh_mmcif_writel(host, MMCIF_CE_DATA, *p++);
+
+       /* buffer write end */
+       sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MDTRANE);
+
+       time = wait_event_interruptible_timeout(host->intr_wait,
+                       host->wait_int == 1 ||
+                       host->sd_error == 1, host->timeout);
+       if (host->wait_int != 1 && (time == 0 || host->sd_error != 0))
+               return sh_mmcif_error_manage(host);
+
+       host->wait_int = 0;
+       return 0;
+}
+
+static int sh_mmcif_multi_write(struct sh_mmcif_host *host,
+                                               struct mmc_request *mrq)
+{
+       struct mmc_data *data = mrq->data;
+       long time;
+       u32 i, sec, j, blocksize, *p;
+
+       blocksize = BLOCK_SIZE_MASK & sh_mmcif_readl(host, MMCIF_CE_BLOCK_SET);
+
+       for (j = 0; j < data->sg_len; j++) {
+               p = sg_virt(data->sg);
+               host->wait_int = 0;
+               for (sec = 0; sec < data->sg->length / blocksize; sec++) {
+                       sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
+                       /* buf write enable*/
+                       time = wait_event_interruptible_timeout(host->intr_wait,
+                               host->wait_int == 1 ||
+                               host->sd_error == 1, host->timeout);
+
+                       if (host->wait_int != 1 &&
+                           (time == 0 || host->sd_error != 0))
+                               return sh_mmcif_error_manage(host);
+
+                       host->wait_int = 0;
+                       for (i = 0; i < blocksize / 4; i++)
+                               sh_mmcif_writel(host, MMCIF_CE_DATA, *p++);
+               }
+               if (j < data->sg_len - 1)
+                       data->sg++;
+       }
+       return 0;
+}
+
+static void sh_mmcif_get_response(struct sh_mmcif_host *host,
+                                               struct mmc_command *cmd)
+{
+       if (cmd->flags & MMC_RSP_136) {
+               cmd->resp[0] = sh_mmcif_readl(host, MMCIF_CE_RESP3);
+               cmd->resp[1] = sh_mmcif_readl(host, MMCIF_CE_RESP2);
+               cmd->resp[2] = sh_mmcif_readl(host, MMCIF_CE_RESP1);
+               cmd->resp[3] = sh_mmcif_readl(host, MMCIF_CE_RESP0);
+       } else
+               cmd->resp[0] = sh_mmcif_readl(host, MMCIF_CE_RESP0);
+}
+
+static void sh_mmcif_get_cmd12response(struct sh_mmcif_host *host,
+                                               struct mmc_command *cmd)
+{
+       cmd->resp[0] = sh_mmcif_readl(host, MMCIF_CE_RESP_CMD12);
+}
+
+static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host,
+               struct mmc_request *mrq, struct mmc_command *cmd, u32 opc)
+{
+       u32 tmp = 0;
+
+       /* Response Type check */
+       switch (mmc_resp_type(cmd)) {
+       case MMC_RSP_NONE:
+               tmp |= CMD_SET_RTYP_NO;
+               break;
+       case MMC_RSP_R1:
+       case MMC_RSP_R1B:
+       case MMC_RSP_R3:
+               tmp |= CMD_SET_RTYP_6B;
+               break;
+       case MMC_RSP_R2:
+               tmp |= CMD_SET_RTYP_17B;
+               break;
+       default:
+               pr_err(DRIVER_NAME": Not support type response.\n");
+               break;
+       }
+       switch (opc) {
+       /* RBSY */
+       case MMC_SWITCH:
+       case MMC_STOP_TRANSMISSION:
+       case MMC_SET_WRITE_PROT:
+       case MMC_CLR_WRITE_PROT:
+       case MMC_ERASE:
+       case MMC_GEN_CMD:
+               tmp |= CMD_SET_RBSY;
+               break;
+       }
+       /* WDAT / DATW */
+       if (host->data) {
+               tmp |= CMD_SET_WDAT;
+               switch (host->bus_width) {
+               case MMC_BUS_WIDTH_1:
+                       tmp |= CMD_SET_DATW_1;
+                       break;
+               case MMC_BUS_WIDTH_4:
+                       tmp |= CMD_SET_DATW_4;
+                       break;
+               case MMC_BUS_WIDTH_8:
+                       tmp |= CMD_SET_DATW_8;
+                       break;
+               default:
+                       pr_err(DRIVER_NAME": Not support bus width.\n");
+                       break;
+               }
+       }
+       /* DWEN */
+       if (opc == MMC_WRITE_BLOCK || opc == MMC_WRITE_MULTIPLE_BLOCK)
+               tmp |= CMD_SET_DWEN;
+       /* CMLTE/CMD12EN */
+       if (opc == MMC_READ_MULTIPLE_BLOCK || opc == MMC_WRITE_MULTIPLE_BLOCK) {
+               tmp |= CMD_SET_CMLTE | CMD_SET_CMD12EN;
+               sh_mmcif_bitset(host, MMCIF_CE_BLOCK_SET,
+                                       mrq->data->blocks << 16);
+       }
+       /* RIDXC[1:0] check bits */
+       if (opc == MMC_SEND_OP_COND || opc == MMC_ALL_SEND_CID ||
+           opc == MMC_SEND_CSD || opc == MMC_SEND_CID)
+               tmp |= CMD_SET_RIDXC_BITS;
+       /* RCRC7C[1:0] check bits */
+       if (opc == MMC_SEND_OP_COND)
+               tmp |= CMD_SET_CRC7C_BITS;
+       /* RCRC7C[1:0] internal CRC7 */
+       if (opc == MMC_ALL_SEND_CID ||
+               opc == MMC_SEND_CSD || opc == MMC_SEND_CID)
+               tmp |= CMD_SET_CRC7C_INTERNAL;
+
+       return opc = ((opc << 24) | tmp);
+}
+
+static u32 sh_mmcif_data_trans(struct sh_mmcif_host *host,
+                               struct mmc_request *mrq, u32 opc)
+{
+       u32 ret;
+
+       switch (opc) {
+       case MMC_READ_MULTIPLE_BLOCK:
+               ret = sh_mmcif_multi_read(host, mrq);
+               break;
+       case MMC_WRITE_MULTIPLE_BLOCK:
+               ret = sh_mmcif_multi_write(host, mrq);
+               break;
+       case MMC_WRITE_BLOCK:
+               ret = sh_mmcif_single_write(host, mrq);
+               break;
+       case MMC_READ_SINGLE_BLOCK:
+       case MMC_SEND_EXT_CSD:
+               ret = sh_mmcif_single_read(host, mrq);
+               break;
+       default:
+               pr_err(DRIVER_NAME": NOT SUPPORT CMD = d'%08d\n", opc);
+               ret = -EINVAL;
+               break;
+       }
+       return ret;
+}
+
+static void sh_mmcif_start_cmd(struct sh_mmcif_host *host,
+                       struct mmc_request *mrq, struct mmc_command *cmd)
+{
+       long time;
+       int ret = 0, mask = 0;
+       u32 opc = cmd->opcode;
+
+       host->cmd = cmd;
+
+       switch (opc) {
+       /* respons busy check */
+       case MMC_SWITCH:
+       case MMC_STOP_TRANSMISSION:
+       case MMC_SET_WRITE_PROT:
+       case MMC_CLR_WRITE_PROT:
+       case MMC_ERASE:
+       case MMC_GEN_CMD:
+               mask = MASK_MRBSYE;
+               break;
+       default:
+               mask = MASK_MCRSPE;
+               break;
+       }
+       mask |= MASK_MCMDVIO | MASK_MBUFVIO | MASK_MWDATERR |
+               MASK_MRDATERR | MASK_MRIDXERR | MASK_MRSPERR |
+               MASK_MCCSTO | MASK_MCRCSTO | MASK_MWDATTO |
+               MASK_MRDATTO | MASK_MRBSYTO | MASK_MRSPTO;
+
+       if (host->data) {
+               sh_mmcif_writel(host, MMCIF_CE_BLOCK_SET, 0);
+               sh_mmcif_writel(host, MMCIF_CE_BLOCK_SET, mrq->data->blksz);
+       }
+       opc = sh_mmcif_set_cmd(host, mrq, cmd, opc);
+
+       sh_mmcif_writel(host, MMCIF_CE_INT, 0xD80430C0);
+       sh_mmcif_writel(host, MMCIF_CE_INT_MASK, mask);
+       /* set arg */
+       sh_mmcif_writel(host, MMCIF_CE_ARG, cmd->arg);
+       host->wait_int = 0;
+       /* set cmd */
+       sh_mmcif_writel(host, MMCIF_CE_CMD_SET, opc);
+
+       time = wait_event_interruptible_timeout(host->intr_wait,
+               host->wait_int == 1 || host->sd_error == 1, host->timeout);
+       if (host->wait_int != 1 && time == 0) {
+               cmd->error = sh_mmcif_error_manage(host);
+               return;
+       }
+       if (host->sd_error) {
+               switch (cmd->opcode) {
+               case MMC_ALL_SEND_CID:
+               case MMC_SELECT_CARD:
+               case MMC_APP_CMD:
+                       cmd->error = -ETIMEDOUT;
+                       break;
+               default:
+                       pr_debug("%s: Cmd(d'%d) err\n",
+                                       DRIVER_NAME, cmd->opcode);
+                       cmd->error = sh_mmcif_error_manage(host);
+                       break;
+               }
+               host->sd_error = 0;
+               host->wait_int = 0;
+               return;
+       }
+       if (!(cmd->flags & MMC_RSP_PRESENT)) {
+               cmd->error = ret;
+               host->wait_int = 0;
+               return;
+       }
+       if (host->wait_int == 1) {
+               sh_mmcif_get_response(host, cmd);
+               host->wait_int = 0;
+       }
+       if (host->data) {
+               ret = sh_mmcif_data_trans(host, mrq, cmd->opcode);
+               if (ret < 0)
+                       mrq->data->bytes_xfered = 0;
+               else
+                       mrq->data->bytes_xfered =
+                               mrq->data->blocks * mrq->data->blksz;
+       }
+       cmd->error = ret;
+}
+
+static void sh_mmcif_stop_cmd(struct sh_mmcif_host *host,
+               struct mmc_request *mrq, struct mmc_command *cmd)
+{
+       long time;
+
+       if (mrq->cmd->opcode == MMC_READ_MULTIPLE_BLOCK)
+               sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MCMD12DRE);
+       else if (mrq->cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK)
+               sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MCMD12RBE);
+       else {
+               pr_err(DRIVER_NAME": not support stop cmd\n");
+               cmd->error = sh_mmcif_error_manage(host);
+               return;
+       }
+
+       time = wait_event_interruptible_timeout(host->intr_wait,
+                       host->wait_int == 1 ||
+                       host->sd_error == 1, host->timeout);
+       if (host->wait_int != 1 && (time == 0 || host->sd_error != 0)) {
+               cmd->error = sh_mmcif_error_manage(host);
+               return;
+       }
+       sh_mmcif_get_cmd12response(host, cmd);
+       host->wait_int = 0;
+       cmd->error = 0;
+}
+
+static void sh_mmcif_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+       struct sh_mmcif_host *host = mmc_priv(mmc);
+
+       switch (mrq->cmd->opcode) {
+       /* MMCIF does not support SD/SDIO command */
+       case SD_IO_SEND_OP_COND:
+       case MMC_APP_CMD:
+               mrq->cmd->error = -ETIMEDOUT;
+               mmc_request_done(mmc, mrq);
+               return;
+       case MMC_SEND_EXT_CSD: /* = SD_SEND_IF_COND (8) */
+               if (!mrq->data) {
+                       /* send_if_cond cmd (not support) */
+                       mrq->cmd->error = -ETIMEDOUT;
+                       mmc_request_done(mmc, mrq);
+                       return;
+               }
+               break;
+       default:
+               break;
+       }
+       host->data = mrq->data;
+       sh_mmcif_start_cmd(host, mrq, mrq->cmd);
+       host->data = NULL;
+
+       if (mrq->cmd->error != 0) {
+               mmc_request_done(mmc, mrq);
+               return;
+       }
+       if (mrq->stop)
+               sh_mmcif_stop_cmd(host, mrq, mrq->stop);
+       mmc_request_done(mmc, mrq);
+}
+
+static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+       struct sh_mmcif_host *host = mmc_priv(mmc);
+       struct sh_mmcif_plat_data *p = host->pd->dev.platform_data;
+
+       if (ios->power_mode == MMC_POWER_OFF) {
+               /* clock stop */
+               sh_mmcif_clock_control(host, 0);
+               if (p->down_pwr)
+                       p->down_pwr(host->pd);
+               return;
+       } else if (ios->power_mode == MMC_POWER_UP) {
+               if (p->set_pwr)
+                       p->set_pwr(host->pd, ios->power_mode);
+       }
+
+       if (ios->clock)
+               sh_mmcif_clock_control(host, ios->clock);
+
+       host->bus_width = ios->bus_width;
+}
+
+static struct mmc_host_ops sh_mmcif_ops = {
+       .request        = sh_mmcif_request,
+       .set_ios        = sh_mmcif_set_ios,
+};
+
+static void sh_mmcif_detect(struct mmc_host *mmc)
+{
+       mmc_detect_change(mmc, 0);
+}
+
+static irqreturn_t sh_mmcif_intr(int irq, void *dev_id)
+{
+       struct sh_mmcif_host *host = dev_id;
+       u32 state = 0;
+       int err = 0;
+
+       state = sh_mmcif_readl(host, MMCIF_CE_INT);
+
+       if (state & INT_RBSYE) {
+               sh_mmcif_writel(host, MMCIF_CE_INT, ~(INT_RBSYE | INT_CRSPE));
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MRBSYE);
+       } else if (state & INT_CRSPE) {
+               sh_mmcif_writel(host, MMCIF_CE_INT, ~INT_CRSPE);
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MCRSPE);
+       } else if (state & INT_BUFREN) {
+               sh_mmcif_writel(host, MMCIF_CE_INT, ~INT_BUFREN);
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
+       } else if (state & INT_BUFWEN) {
+               sh_mmcif_writel(host, MMCIF_CE_INT, ~INT_BUFWEN);
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
+       } else if (state & INT_CMD12DRE) {
+               sh_mmcif_writel(host, MMCIF_CE_INT,
+                       ~(INT_CMD12DRE | INT_CMD12RBE |
+                         INT_CMD12CRE | INT_BUFRE));
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MCMD12DRE);
+       } else if (state & INT_BUFRE) {
+               sh_mmcif_writel(host, MMCIF_CE_INT, ~INT_BUFRE);
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MBUFRE);
+       } else if (state & INT_DTRANE) {
+               sh_mmcif_writel(host, MMCIF_CE_INT, ~INT_DTRANE);
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MDTRANE);
+       } else if (state & INT_CMD12RBE) {
+               sh_mmcif_writel(host, MMCIF_CE_INT,
+                               ~(INT_CMD12RBE | INT_CMD12CRE));
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MCMD12RBE);
+       } else if (state & INT_ERR_STS) {
+               /* err interrupts */
+               sh_mmcif_writel(host, MMCIF_CE_INT, ~state);
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, state);
+               err = 1;
+       } else {
+               pr_debug("%s: Not support int\n", DRIVER_NAME);
+               sh_mmcif_writel(host, MMCIF_CE_INT, ~state);
+               sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, state);
+               err = 1;
+       }
+       if (err) {
+               host->sd_error = 1;
+               pr_debug("%s: int err state = %08x\n", DRIVER_NAME, state);
+       }
+       host->wait_int = 1;
+       wake_up(&host->intr_wait);
+
+       return IRQ_HANDLED;
+}
+
+static int __devinit sh_mmcif_probe(struct platform_device *pdev)
+{
+       int ret = 0, irq[2];
+       struct mmc_host *mmc;
+       struct sh_mmcif_host *host = NULL;
+       struct sh_mmcif_plat_data *pd = NULL;
+       struct resource *res;
+       void __iomem *reg;
+       char clk_name[8];
+
+       irq[0] = platform_get_irq(pdev, 0);
+       irq[1] = platform_get_irq(pdev, 1);
+       if (irq[0] < 0 || irq[1] < 0) {
+               pr_err(DRIVER_NAME": Get irq error\n");
+               return -ENXIO;
+       }
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(&pdev->dev, "platform_get_resource error.\n");
+               return -ENXIO;
+       }
+       reg = ioremap(res->start, resource_size(res));
+       if (!reg) {
+               dev_err(&pdev->dev, "ioremap error.\n");
+               return -ENOMEM;
+       }
+       pd = (struct sh_mmcif_plat_data *)(pdev->dev.platform_data);
+       if (!pd) {
+               dev_err(&pdev->dev, "sh_mmcif plat data error.\n");
+               ret = -ENXIO;
+               goto clean_up;
+       }
+       mmc = mmc_alloc_host(sizeof(struct sh_mmcif_host), &pdev->dev);
+       if (!mmc) {
+               ret = -ENOMEM;
+               goto clean_up;
+       }
+       host            = mmc_priv(mmc);
+       host->mmc       = mmc;
+       host->addr      = reg;
+       host->timeout   = 1000;
+
+       snprintf(clk_name, sizeof(clk_name), "mmc%d", pdev->id);
+       host->hclk = clk_get(&pdev->dev, clk_name);
+       if (IS_ERR(host->hclk)) {
+               dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name);
+               ret = PTR_ERR(host->hclk);
+               goto clean_up1;
+       }
+       clk_enable(host->hclk);
+       host->clk = clk_get_rate(host->hclk);
+       host->pd = pdev;
+
+       init_waitqueue_head(&host->intr_wait);
+
+       mmc->ops = &sh_mmcif_ops;
+       mmc->f_max = host->clk;
+       /* close to 400KHz */
+       if (mmc->f_max < 51200000)
+               mmc->f_min = mmc->f_max / 128;
+       else if (mmc->f_max < 102400000)
+               mmc->f_min = mmc->f_max / 256;
+       else
+               mmc->f_min = mmc->f_max / 512;
+       if (pd->ocr)
+               mmc->ocr_avail = pd->ocr;
+       mmc->caps = MMC_CAP_MMC_HIGHSPEED;
+       if (pd->caps)
+               mmc->caps |= pd->caps;
+       mmc->max_phys_segs = 128;
+       mmc->max_hw_segs = 128;
+       mmc->max_blk_size = 512;
+       mmc->max_blk_count = 65535;
+       mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+       mmc->max_seg_size = mmc->max_req_size;
+
+       sh_mmcif_sync_reset(host);
+       platform_set_drvdata(pdev, host);
+       mmc_add_host(mmc);
+
+       ret = request_irq(irq[0], sh_mmcif_intr, 0, "sh_mmc:error", host);
+       if (ret) {
+               pr_err(DRIVER_NAME": request_irq error (sh_mmc:error)\n");
+               goto clean_up2;
+       }
+       ret = request_irq(irq[1], sh_mmcif_intr, 0, "sh_mmc:int", host);
+       if (ret) {
+               free_irq(irq[0], host);
+               pr_err(DRIVER_NAME": request_irq error (sh_mmc:int)\n");
+               goto clean_up2;
+       }
+
+       sh_mmcif_writel(host, MMCIF_CE_INT_MASK, MASK_ALL);
+       sh_mmcif_detect(host->mmc);
+
+       pr_info("%s: driver version %s\n", DRIVER_NAME, DRIVER_VERSION);
+       pr_debug("%s: chip ver H'%04x\n", DRIVER_NAME,
+                       sh_mmcif_readl(host, MMCIF_CE_VERSION) & 0x0000ffff);
+       return ret;
+
+clean_up2:
+       clk_disable(host->hclk);
+clean_up1:
+       mmc_free_host(mmc);
+clean_up:
+       if (reg)
+               iounmap(reg);
+       return ret;
+}
+
+static int __devexit sh_mmcif_remove(struct platform_device *pdev)
+{
+       struct sh_mmcif_host *host = platform_get_drvdata(pdev);
+       int irq[2];
+
+       sh_mmcif_writel(host, MMCIF_CE_INT_MASK, MASK_ALL);
+
+       irq[0] = platform_get_irq(pdev, 0);
+       irq[1] = platform_get_irq(pdev, 1);
+
+       if (host->addr)
+               iounmap(host->addr);
+
+       platform_set_drvdata(pdev, NULL);
+       mmc_remove_host(host->mmc);
+
+       free_irq(irq[0], host);
+       free_irq(irq[1], host);
+
+       clk_disable(host->hclk);
+       mmc_free_host(host->mmc);
+
+       return 0;
+}
+
+static struct platform_driver sh_mmcif_driver = {
+       .probe          = sh_mmcif_probe,
+       .remove         = sh_mmcif_remove,
+       .driver         = {
+               .name   = DRIVER_NAME,
+       },
+};
+
+static int __init sh_mmcif_init(void)
+{
+       return platform_driver_register(&sh_mmcif_driver);
+}
+
+static void __exit sh_mmcif_exit(void)
+{
+       platform_driver_unregister(&sh_mmcif_driver);
+}
+
+module_init(sh_mmcif_init);
+module_exit(sh_mmcif_exit);
+
+
+MODULE_DESCRIPTION("SuperH on-chip MMC/eMMC interface driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS(DRIVER_NAME);
+MODULE_AUTHOR("Yusuke Goda <yusuke.goda.sx@renesas.com>");
index 82554dd..cec9995 100644 (file)
@@ -1032,7 +1032,7 @@ static void tifm_sd_remove(struct tifm_dev *sock)
 
 static int tifm_sd_suspend(struct tifm_dev *sock, pm_message_t state)
 {
-       return mmc_suspend_host(tifm_get_drvdata(sock), state);
+       return mmc_suspend_host(tifm_get_drvdata(sock));
 }
 
 static int tifm_sd_resume(struct tifm_dev *sock)
index 883fcac..ee7d0a5 100644 (file)
@@ -768,7 +768,7 @@ static int tmio_mmc_suspend(struct platform_device *dev, pm_message_t state)
        struct mmc_host *mmc = platform_get_drvdata(dev);
        int ret;
 
-       ret = mmc_suspend_host(mmc, state);
+       ret = mmc_suspend_host(mmc);
 
        /* Tell MFD core it can disable us now.*/
        if (!ret && cell->disable)
index 632858a..19f2d72 100644 (file)
@@ -1280,7 +1280,7 @@ static int via_sd_suspend(struct pci_dev *pcidev, pm_message_t state)
        via_save_pcictrlreg(host);
        via_save_sdcreg(host);
 
-       ret = mmc_suspend_host(host->mmc, state);
+       ret = mmc_suspend_host(host->mmc);
 
        pci_save_state(pcidev);
        pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0);
index 69efe01..0012f5d 100644 (file)
@@ -1819,7 +1819,7 @@ static int wbsd_suspend(struct wbsd_host *host, pm_message_t state)
 {
        BUG_ON(host == NULL);
 
-       return mmc_suspend_host(host->mmc, state);
+       return mmc_suspend_host(host->mmc);
 }
 
 static int wbsd_resume(struct wbsd_host *host)
index 1586e1c..8bef6d6 100644 (file)
@@ -18,6 +18,8 @@
 #include <linux/parport.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/platform_device.h>
+
 #include <asm/setup.h>
 #include <asm/amigahw.h>
 #include <asm/irq.h>
@@ -31,7 +33,6 @@
 #define DPRINTK(x...)  do { } while (0)
 #endif
 
-static struct parport *this_port = NULL;
 
 static void amiga_write_data(struct parport *p, unsigned char data)
 {
@@ -227,18 +228,11 @@ static struct parport_operations pp_amiga_ops = {
 
 /* ----------- Initialisation code --------------------------------- */
 
-static int __init parport_amiga_init(void)
+static int __init amiga_parallel_probe(struct platform_device *pdev)
 {
        struct parport *p;
        int err;
 
-       if (!MACH_IS_AMIGA || !AMIGAHW_PRESENT(AMI_PARALLEL))
-               return -ENODEV;
-
-       err = -EBUSY;
-       if (!request_mem_region(CIAA_PHYSADDR-1+0x100, 0x100, "parallel"))
-               goto out_mem;
-
        ciaa.ddrb = 0xff;
        ciab.ddra &= 0xf8;
        mb();
@@ -246,41 +240,63 @@ static int __init parport_amiga_init(void)
        p = parport_register_port((unsigned long)&ciaa.prb, IRQ_AMIGA_CIAA_FLG,
                                   PARPORT_DMA_NONE, &pp_amiga_ops);
        if (!p)
-               goto out_port;
+               return -EBUSY;
 
-       err = request_irq(IRQ_AMIGA_CIAA_FLG, parport_irq_handler, 0, p->name, p);
+       err = request_irq(IRQ_AMIGA_CIAA_FLG, parport_irq_handler, 0, p->name,
+                         p);
        if (err)
                goto out_irq;
 
-       this_port = p;
        printk(KERN_INFO "%s: Amiga built-in port using irq\n", p->name);
        /* XXX: set operating mode */
        parport_announce_port(p);
 
+       platform_set_drvdata(pdev, p);
+
        return 0;
 
 out_irq:
        parport_put_port(p);
-out_port:
-       release_mem_region(CIAA_PHYSADDR-1+0x100, 0x100);
-out_mem:
        return err;
 }
 
-static void __exit parport_amiga_exit(void)
+static int __exit amiga_parallel_remove(struct platform_device *pdev)
+{
+       struct parport *port = platform_get_drvdata(pdev);
+
+       parport_remove_port(port);
+       if (port->irq != PARPORT_IRQ_NONE)
+               free_irq(IRQ_AMIGA_CIAA_FLG, port);
+       parport_put_port(port);
+       platform_set_drvdata(pdev, NULL);
+       return 0;
+}
+
+static struct platform_driver amiga_parallel_driver = {
+       .remove = __exit_p(amiga_parallel_remove),
+       .driver   = {
+               .name   = "amiga-parallel",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init amiga_parallel_init(void)
+{
+       return platform_driver_probe(&amiga_parallel_driver,
+                                    amiga_parallel_probe);
+}
+
+module_init(amiga_parallel_init);
+
+static void __exit amiga_parallel_exit(void)
 {
-       parport_remove_port(this_port);
-       if (this_port->irq != PARPORT_IRQ_NONE)
-               free_irq(IRQ_AMIGA_CIAA_FLG, this_port);
-       parport_put_port(this_port);
-       release_mem_region(CIAA_PHYSADDR-1+0x100, 0x100);
+       platform_driver_unregister(&amiga_parallel_driver);
 }
 
+module_exit(amiga_parallel_exit);
 
 MODULE_AUTHOR("Joerg Dorchain <joerg@dorchain.net>");
 MODULE_DESCRIPTION("Parport Driver for Amiga builtin Port");
 MODULE_SUPPORTED_DEVICE("Amiga builtin Parallel Port");
 MODULE_LICENSE("GPL");
-
-module_init(parport_amiga_init)
-module_exit(parport_amiga_exit)
+MODULE_ALIAS("platform:amiga-parallel");
index c32822a..070211a 100644 (file)
@@ -8,3 +8,27 @@ config RAPIDIO_DISC_TIMEOUT
        ---help---
          Amount of time a discovery node waits for a host to complete
          enumeration before giving up.
+
+config RAPIDIO_ENABLE_RX_TX_PORTS
+       bool "Enable RapidIO Input/Output Ports"
+       depends on RAPIDIO
+       ---help---
+         The RapidIO specification describes a Output port transmit
+         enable and a Input port receive enable. The recommended state
+         for Input ports and Output ports should be disabled. When
+         this switch is set the RapidIO subsystem will enable all
+         ports for Input/Output direction to allow other traffic
+         than Maintenance transfers.
+
+source "drivers/rapidio/switches/Kconfig"
+
+config RAPIDIO_DEBUG
+       bool "RapidIO subsystem debug messages"
+       depends on RAPIDIO
+       help
+         Say Y here if you want the RapidIO subsystem to produce a bunch of
+         debug messages to the system log. Select this if you are having a
+         problem with the RapidIO subsystem and want to see more of what is
+         going on.
+
+         If you are unsure about this, say N here.
index 7c0e181..b6139fe 100644 (file)
@@ -4,3 +4,7 @@
 obj-y += rio.o rio-access.o rio-driver.o rio-scan.o rio-sysfs.o
 
 obj-$(CONFIG_RAPIDIO)          += switches/
+
+ifeq ($(CONFIG_RAPIDIO_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
+endif
index 4541509..5664321 100644 (file)
@@ -4,6 +4,14 @@
  * Copyright 2005 MontaVista Software, Inc.
  * Matt Porter <mporter@kernel.crashing.org>
  *
+ * Copyright 2009 Integrated Device Technology, Inc.
+ * Alex Bounine <alexandre.bounine@idt.com>
+ * - Added Port-Write/Error Management initialization and handling
+ *
+ * Copyright 2009 Sysgo AG
+ * Thomas Moll <thomas.moll@sysgo.com>
+ * - Added Input- Output- enable functionality, to allow full communication
+ *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
  * Free Software Foundation;  either version 2 of the  License, or (at your
 LIST_HEAD(rio_devices);
 static LIST_HEAD(rio_switches);
 
-#define RIO_ENUM_CMPL_MAGIC    0xdeadbeef
-
 static void rio_enum_timeout(unsigned long);
 
+static void rio_init_em(struct rio_dev *rdev);
+
 DEFINE_SPINLOCK(rio_global_list_lock);
 
 static int next_destid = 0;
 static int next_switchid = 0;
 static int next_net = 0;
+static int next_comptag;
 
 static struct timer_list rio_enum_timer =
 TIMER_INITIALIZER(rio_enum_timeout, 0, 0);
@@ -52,12 +61,6 @@ static int rio_mport_phys_table[] = {
        -1,
 };
 
-static int rio_sport_phys_table[] = {
-       RIO_EFB_PAR_EP_FREE_ID,
-       RIO_EFB_SER_EP_FREE_ID,
-       -1,
-};
-
 /**
  * rio_get_device_id - Get the base/extended device id for a device
  * @port: RIO master port
@@ -118,12 +121,26 @@ static int rio_clear_locks(struct rio_mport *port)
        u32 result;
        int ret = 0;
 
-       /* Write component tag CSR magic complete value */
-       rio_local_write_config_32(port, RIO_COMPONENT_TAG_CSR,
-                                 RIO_ENUM_CMPL_MAGIC);
-       list_for_each_entry(rdev, &rio_devices, global_list)
-           rio_write_config_32(rdev, RIO_COMPONENT_TAG_CSR,
-                               RIO_ENUM_CMPL_MAGIC);
+       /* Assign component tag to all devices */
+       next_comptag = 1;
+       rio_local_write_config_32(port, RIO_COMPONENT_TAG_CSR, next_comptag++);
+
+       list_for_each_entry(rdev, &rio_devices, global_list) {
+               /* Mark device as discovered */
+               rio_read_config_32(rdev,
+                                  rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR,
+                                  &result);
+               rio_write_config_32(rdev,
+                                   rdev->phys_efptr + RIO_PORT_GEN_CTL_CSR,
+                                   result | RIO_PORT_GEN_DISCOVERED);
+
+               rio_write_config_32(rdev, RIO_COMPONENT_TAG_CSR, next_comptag);
+               rdev->comp_tag = next_comptag++;
+               if (next_comptag >= 0x10000) {
+                       pr_err("RIO: Component Tag Counter Overflow\n");
+                       break;
+               }
+       }
 
        /* Release host device id locks */
        rio_local_write_config_32(port, RIO_HOST_DID_LOCK_CSR,
@@ -229,27 +246,37 @@ static int rio_is_switch(struct rio_dev *rdev)
 }
 
 /**
- * rio_route_set_ops- Sets routing operations for a particular vendor switch
+ * rio_switch_init - Sets switch operations for a particular vendor switch
  * @rdev: RIO device
+ * @do_enum: Enumeration/Discovery mode flag
  *
- * Searches the RIO route ops table for known switch types. If the vid
- * and did match a switch table entry, then set the add_entry() and
- * get_entry() ops to the table entry values.
+ * Searches the RIO switch ops table for known switch types. If the vid
+ * and did match a switch table entry, then call switch initialization
+ * routine to setup switch-specific routines.
  */
-static void rio_route_set_ops(struct rio_dev *rdev)
+static void rio_switch_init(struct rio_dev *rdev, int do_enum)
 {
-       struct rio_route_ops *cur = __start_rio_route_ops;
-       struct rio_route_ops *end = __end_rio_route_ops;
+       struct rio_switch_ops *cur = __start_rio_switch_ops;
+       struct rio_switch_ops *end = __end_rio_switch_ops;
 
        while (cur < end) {
                if ((cur->vid == rdev->vid) && (cur->did == rdev->did)) {
-                       pr_debug("RIO: adding routing ops for %s\n", rio_name(rdev));
-                       rdev->rswitch->add_entry = cur->add_hook;
-                       rdev->rswitch->get_entry = cur->get_hook;
+                       pr_debug("RIO: calling init routine for %s\n",
+                                rio_name(rdev));
+                       cur->init_hook(rdev, do_enum);
+                       break;
                }
                cur++;
        }
 
+       if ((cur >= end) && (rdev->pef & RIO_PEF_STD_RT)) {
+               pr_debug("RIO: adding STD routing ops for %s\n",
+                       rio_name(rdev));
+               rdev->rswitch->add_entry = rio_std_route_add_entry;
+               rdev->rswitch->get_entry = rio_std_route_get_entry;
+               rdev->rswitch->clr_table = rio_std_route_clr_table;
+       }
+
        if (!rdev->rswitch->add_entry || !rdev->rswitch->get_entry)
                printk(KERN_ERR "RIO: missing routing ops for %s\n",
                       rio_name(rdev));
@@ -281,6 +308,65 @@ static int __devinit rio_add_device(struct rio_dev *rdev)
 }
 
 /**
+ * rio_enable_rx_tx_port - enable input reciever and output transmitter of
+ * given port
+ * @port: Master port associated with the RIO network
+ * @local: local=1 select local port otherwise a far device is reached
+ * @destid: Destination ID of the device to check host bit
+ * @hopcount: Number of hops to reach the target
+ * @port_num: Port (-number on switch) to enable on a far end device
+ *
+ * Returns 0 or 1 from on General Control Command and Status Register
+ * (EXT_PTR+0x3C)
+ */
+inline int rio_enable_rx_tx_port(struct rio_mport *port,
+                                int local, u16 destid,
+                                u8 hopcount, u8 port_num) {
+#ifdef CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS
+       u32 regval;
+       u32 ext_ftr_ptr;
+
+       /*
+       * enable rx input tx output port
+       */
+       pr_debug("rio_enable_rx_tx_port(local = %d, destid = %d, hopcount = "
+                "%d, port_num = %d)\n", local, destid, hopcount, port_num);
+
+       ext_ftr_ptr = rio_mport_get_physefb(port, local, destid, hopcount);
+
+       if (local) {
+               rio_local_read_config_32(port, ext_ftr_ptr +
+                               RIO_PORT_N_CTL_CSR(0),
+                               &regval);
+       } else {
+               if (rio_mport_read_config_32(port, destid, hopcount,
+               ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), &regval) < 0)
+                       return -EIO;
+       }
+
+       if (regval & RIO_PORT_N_CTL_P_TYP_SER) {
+               /* serial */
+               regval = regval | RIO_PORT_N_CTL_EN_RX_SER
+                               | RIO_PORT_N_CTL_EN_TX_SER;
+       } else {
+               /* parallel */
+               regval = regval | RIO_PORT_N_CTL_EN_RX_PAR
+                               | RIO_PORT_N_CTL_EN_TX_PAR;
+       }
+
+       if (local) {
+               rio_local_write_config_32(port, ext_ftr_ptr +
+                                         RIO_PORT_N_CTL_CSR(0), regval);
+       } else {
+               if (rio_mport_write_config_32(port, destid, hopcount,
+                   ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), regval) < 0)
+                       return -EIO;
+       }
+#endif
+       return 0;
+}
+
+/**
  * rio_setup_device- Allocates and sets up a RIO device
  * @net: RIO network
  * @port: Master port to send transactions
@@ -325,8 +411,14 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
        rdev->asm_rev = result >> 16;
        rio_mport_read_config_32(port, destid, hopcount, RIO_PEF_CAR,
                                 &rdev->pef);
-       if (rdev->pef & RIO_PEF_EXT_FEATURES)
+       if (rdev->pef & RIO_PEF_EXT_FEATURES) {
                rdev->efptr = result & 0xffff;
+               rdev->phys_efptr = rio_mport_get_physefb(port, 0, destid,
+                                                        hopcount);
+
+               rdev->em_efptr = rio_mport_get_feature(port, 0, destid,
+                                               hopcount, RIO_EFB_ERR_MGMNT);
+       }
 
        rio_mport_read_config_32(port, destid, hopcount, RIO_SRC_OPS_CAR,
                                 &rdev->src_ops);
@@ -349,12 +441,13 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
        if (rio_is_switch(rdev)) {
                rio_mport_read_config_32(port, destid, hopcount,
                                         RIO_SWP_INFO_CAR, &rdev->swpinfo);
-               rswitch = kmalloc(sizeof(struct rio_switch), GFP_KERNEL);
+               rswitch = kzalloc(sizeof(struct rio_switch), GFP_KERNEL);
                if (!rswitch)
                        goto cleanup;
                rswitch->switchid = next_switchid;
                rswitch->hopcount = hopcount;
                rswitch->destid = destid;
+               rswitch->port_ok = 0;
                rswitch->route_table = kzalloc(sizeof(u8)*
                                        RIO_MAX_ROUTE_ENTRIES(port->sys_size),
                                        GFP_KERNEL);
@@ -367,13 +460,22 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
                rdev->rswitch = rswitch;
                dev_set_name(&rdev->dev, "%02x:s:%04x", rdev->net->id,
                             rdev->rswitch->switchid);
-               rio_route_set_ops(rdev);
+               rio_switch_init(rdev, do_enum);
+
+               if (do_enum && rdev->rswitch->clr_table)
+                       rdev->rswitch->clr_table(port, destid, hopcount,
+                                                RIO_GLOBAL_TABLE);
 
                list_add_tail(&rswitch->node, &rio_switches);
 
-       } else
+       } else {
+               if (do_enum)
+                       /*Enable Input Output Port (transmitter reviever)*/
+                       rio_enable_rx_tx_port(port, 0, destid, hopcount, 0);
+
                dev_set_name(&rdev->dev, "%02x:e:%04x", rdev->net->id,
                             rdev->destid);
+       }
 
        rdev->dev.bus = &rio_bus_type;
 
@@ -414,23 +516,29 @@ cleanup:
  *
  * Reads the port error status CSR for a particular switch port to
  * determine if the port has an active link.  Returns
- * %PORT_N_ERR_STS_PORT_OK if the port is active or %0 if it is
+ * %RIO_PORT_N_ERR_STS_PORT_OK if the port is active or %0 if it is
  * inactive.
  */
 static int
 rio_sport_is_active(struct rio_mport *port, u16 destid, u8 hopcount, int sport)
 {
-       u32 result;
+       u32 result = 0;
        u32 ext_ftr_ptr;
 
-       int *entry = rio_sport_phys_table;
-
-       do {
-               if ((ext_ftr_ptr =
-                    rio_mport_get_feature(port, 0, destid, hopcount, *entry)))
+       ext_ftr_ptr = rio_mport_get_efb(port, 0, destid, hopcount, 0);
 
+       while (ext_ftr_ptr) {
+               rio_mport_read_config_32(port, destid, hopcount,
+                                        ext_ftr_ptr, &result);
+               result = RIO_GET_BLOCK_ID(result);
+               if ((result == RIO_EFB_SER_EP_FREE_ID) ||
+                   (result == RIO_EFB_SER_EP_FREE_ID_V13P) ||
+                   (result == RIO_EFB_SER_EP_FREC_ID))
                        break;
-       } while (*++entry >= 0);
+
+               ext_ftr_ptr = rio_mport_get_efb(port, 0, destid, hopcount,
+                                               ext_ftr_ptr);
+       }
 
        if (ext_ftr_ptr)
                rio_mport_read_config_32(port, destid, hopcount,
@@ -438,7 +546,81 @@ rio_sport_is_active(struct rio_mport *port, u16 destid, u8 hopcount, int sport)
                                         RIO_PORT_N_ERR_STS_CSR(sport),
                                         &result);
 
-       return (result & PORT_N_ERR_STS_PORT_OK);
+       return result & RIO_PORT_N_ERR_STS_PORT_OK;
+}
+
+/**
+ * rio_lock_device - Acquires host device lock for specified device
+ * @port: Master port to send transaction
+ * @destid: Destination ID for device/switch
+ * @hopcount: Hopcount to reach switch
+ * @wait_ms: Max wait time in msec (0 = no timeout)
+ *
+ * Attepts to acquire host device lock for specified device
+ * Returns 0 if device lock acquired or EINVAL if timeout expires.
+ */
+static int
+rio_lock_device(struct rio_mport *port, u16 destid, u8 hopcount, int wait_ms)
+{
+       u32 result;
+       int tcnt = 0;
+
+       /* Attempt to acquire device lock */
+       rio_mport_write_config_32(port, destid, hopcount,
+                                 RIO_HOST_DID_LOCK_CSR, port->host_deviceid);
+       rio_mport_read_config_32(port, destid, hopcount,
+                                RIO_HOST_DID_LOCK_CSR, &result);
+
+       while (result != port->host_deviceid) {
+               if (wait_ms != 0 && tcnt == wait_ms) {
+                       pr_debug("RIO: timeout when locking device %x:%x\n",
+                               destid, hopcount);
+                       return -EINVAL;
+               }
+
+               /* Delay a bit */
+               mdelay(1);
+               tcnt++;
+               /* Try to acquire device lock again */
+               rio_mport_write_config_32(port, destid,
+                       hopcount,
+                       RIO_HOST_DID_LOCK_CSR,
+                       port->host_deviceid);
+               rio_mport_read_config_32(port, destid,
+                       hopcount,
+                       RIO_HOST_DID_LOCK_CSR, &result);
+       }
+
+       return 0;
+}
+
+/**
+ * rio_unlock_device - Releases host device lock for specified device
+ * @port: Master port to send transaction
+ * @destid: Destination ID for device/switch
+ * @hopcount: Hopcount to reach switch
+ *
+ * Returns 0 if device lock released or EINVAL if fails.
+ */
+static int
+rio_unlock_device(struct rio_mport *port, u16 destid, u8 hopcount)
+{
+       u32 result;
+
+       /* Release device lock */
+       rio_mport_write_config_32(port, destid,
+                                 hopcount,
+                                 RIO_HOST_DID_LOCK_CSR,
+                                 port->host_deviceid);
+       rio_mport_read_config_32(port, destid, hopcount,
+               RIO_HOST_DID_LOCK_CSR, &result);
+       if ((result & 0xffff) != 0xffff) {
+               pr_debug("RIO: badness when releasing device lock %x:%x\n",
+                        destid, hopcount);
+               return -EINVAL;
+       }
+
+       return 0;
 }
 
 /**
@@ -448,6 +630,7 @@ rio_sport_is_active(struct rio_mport *port, u16 destid, u8 hopcount, int sport)
  * @table: Routing table ID
  * @route_destid: Destination ID to be routed
  * @route_port: Port number to be routed
+ * @lock: lock switch device flag
  *
  * Calls the switch specific add_entry() method to add a route entry
  * on a switch. The route table can be specified using the @table
@@ -456,12 +639,26 @@ rio_sport_is_active(struct rio_mport *port, u16 destid, u8 hopcount, int sport)
  * %RIO_GLOBAL_TABLE in @table. Returns %0 on success or %-EINVAL
  * on failure.
  */
-static int rio_route_add_entry(struct rio_mport *mport, struct rio_switch *rswitch,
-                              u16 table, u16 route_destid, u8 route_port)
+static int
+rio_route_add_entry(struct rio_mport *mport, struct rio_switch *rswitch,
+                   u16 table, u16 route_destid, u8 route_port, int lock)
 {
-       return rswitch->add_entry(mport, rswitch->destid,
+       int rc;
+
+       if (lock) {
+               rc = rio_lock_device(mport, rswitch->destid,
+                                    rswitch->hopcount, 1000);
+               if (rc)
+                       return rc;
+       }
+
+       rc = rswitch->add_entry(mport, rswitch->destid,
                                        rswitch->hopcount, table,
                                        route_destid, route_port);
+       if (lock)
+               rio_unlock_device(mport, rswitch->destid, rswitch->hopcount);
+
+       return rc;
 }
 
 /**
@@ -471,6 +668,7 @@ static int rio_route_add_entry(struct rio_mport *mport, struct rio_switch *rswit
  * @table: Routing table ID
  * @route_destid: Destination ID to be routed
  * @route_port: Pointer to read port number into
+ * @lock: lock switch device flag
  *
  * Calls the switch specific get_entry() method to read a route entry
  * in a switch. The route table can be specified using the @table
@@ -481,11 +679,24 @@ static int rio_route_add_entry(struct rio_mport *mport, struct rio_switch *rswit
  */
 static int
 rio_route_get_entry(struct rio_mport *mport, struct rio_switch *rswitch, u16 table,
-                   u16 route_destid, u8 * route_port)
+                   u16 route_destid, u8 *route_port, int lock)
 {
-       return rswitch->get_entry(mport, rswitch->destid,
+       int rc;
+
+       if (lock) {
+               rc = rio_lock_device(mport, rswitch->destid,
+                                    rswitch->hopcount, 1000);
+               if (rc)
+                       return rc;
+       }
+
+       rc = rswitch->get_entry(mport, rswitch->destid,
                                        rswitch->hopcount, table,
                                        route_destid, route_port);
+       if (lock)
+               rio_unlock_device(mport, rswitch->destid, rswitch->hopcount);
+
+       return rc;
 }
 
 /**
@@ -625,14 +836,14 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
                sw_inport = rio_get_swpinfo_inport(port,
                                RIO_ANY_DESTID(port->sys_size), hopcount);
                rio_route_add_entry(port, rdev->rswitch, RIO_GLOBAL_TABLE,
-                                   port->host_deviceid, sw_inport);
+                                   port->host_deviceid, sw_inport, 0);
                rdev->rswitch->route_table[port->host_deviceid] = sw_inport;
 
                for (destid = 0; destid < next_destid; destid++) {
                        if (destid == port->host_deviceid)
                                continue;
                        rio_route_add_entry(port, rdev->rswitch, RIO_GLOBAL_TABLE,
-                                           destid, sw_inport);
+                                           destid, sw_inport, 0);
                        rdev->rswitch->route_table[destid] = sw_inport;
                }
 
@@ -644,8 +855,15 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
                    rio_name(rdev), rdev->vid, rdev->did, num_ports);
                sw_destid = next_destid;
                for (port_num = 0; port_num < num_ports; port_num++) {
-                       if (sw_inport == port_num)
+                       /*Enable Input Output Port (transmitter reviever)*/
+                       rio_enable_rx_tx_port(port, 0,
+                                             RIO_ANY_DESTID(port->sys_size),
+                                             hopcount, port_num);
+
+                       if (sw_inport == port_num) {
+                               rdev->rswitch->port_ok |= (1 << port_num);
                                continue;
+                       }
 
                        cur_destid = next_destid;
 
@@ -655,10 +873,11 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
                                pr_debug(
                                    "RIO: scanning device on port %d\n",
                                    port_num);
+                               rdev->rswitch->port_ok |= (1 << port_num);
                                rio_route_add_entry(port, rdev->rswitch,
                                                RIO_GLOBAL_TABLE,
                                                RIO_ANY_DESTID(port->sys_size),
-                                               port_num);
+                                               port_num, 0);
 
                                if (rio_enum_peer(net, port, hopcount + 1) < 0)
                                        return -1;
@@ -672,15 +891,35 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
                                                rio_route_add_entry(port, rdev->rswitch,
                                                                    RIO_GLOBAL_TABLE,
                                                                    destid,
-                                                                   port_num);
+                                                                   port_num,
+                                                                   0);
                                                rdev->rswitch->
                                                    route_table[destid] =
                                                    port_num;
                                        }
                                }
+                       } else {
+                               /* If switch supports Error Management,
+                                * set PORT_LOCKOUT bit for unused port
+                                */
+                               if (rdev->em_efptr)
+                                       rio_set_port_lockout(rdev, port_num, 1);
+
+                               rdev->rswitch->port_ok &= ~(1 << port_num);
                        }
                }
 
+               /* Direct Port-write messages to the enumeratiing host */
+               if ((rdev->src_ops & RIO_SRC_OPS_PORT_WRITE) &&
+                   (rdev->em_efptr)) {
+                       rio_write_config_32(rdev,
+                                       rdev->em_efptr + RIO_EM_PW_TGT_DEVID,
+                                       (port->host_deviceid << 16) |
+                                       (port->sys_size << 15));
+               }
+
+               rio_init_em(rdev);
+
                /* Check for empty switch */
                if (next_destid == sw_destid) {
                        next_destid++;
@@ -700,21 +939,16 @@ static int __devinit rio_enum_peer(struct rio_net *net, struct rio_mport *port,
  * rio_enum_complete- Tests if enumeration of a network is complete
  * @port: Master port to send transaction
  *
- * Tests the Component Tag CSR for presence of the magic enumeration
- * complete flag. Return %1 if enumeration is complete or %0 if
+ * Tests the Component Tag CSR for non-zero value (enumeration
+ * complete flag). Return %1 if enumeration is complete or %0 if
  * enumeration is incomplete.
  */
 static int rio_enum_complete(struct rio_mport *port)
 {
        u32 tag_csr;
-       int ret = 0;
 
        rio_local_read_config_32(port, RIO_COMPONENT_TAG_CSR, &tag_csr);
-
-       if (tag_csr == RIO_ENUM_CMPL_MAGIC)
-               ret = 1;
-
-       return ret;
+       return (tag_csr & 0xffff) ? 1 : 0;
 }
 
 /**
@@ -763,17 +997,21 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid,
                                pr_debug(
                                    "RIO: scanning device on port %d\n",
                                    port_num);
+
+                               rio_lock_device(port, destid, hopcount, 1000);
+
                                for (ndestid = 0;
                                     ndestid < RIO_ANY_DESTID(port->sys_size);
                                     ndestid++) {
                                        rio_route_get_entry(port, rdev->rswitch,
                                                            RIO_GLOBAL_TABLE,
                                                            ndestid,
-                                                           &route_port);
+                                                           &route_port, 0);
                                        if (route_port == port_num)
                                                break;
                                }
 
+                               rio_unlock_device(port, destid, hopcount);
                                if (rio_disc_peer
                                    (net, port, ndestid, hopcount + 1) < 0)
                                        return -1;
@@ -792,7 +1030,7 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid,
  *
  * Reads the port error status CSR for the master port to
  * determine if the port has an active link.  Returns
- * %PORT_N_ERR_STS_PORT_OK if the  master port is active
+ * %RIO_PORT_N_ERR_STS_PORT_OK if the  master port is active
  * or %0 if it is inactive.
  */
 static int rio_mport_is_active(struct rio_mport *port)
@@ -813,7 +1051,7 @@ static int rio_mport_is_active(struct rio_mport *port)
                                         RIO_PORT_N_ERR_STS_CSR(port->index),
                                         &result);
 
-       return (result & PORT_N_ERR_STS_PORT_OK);
+       return result & RIO_PORT_N_ERR_STS_PORT_OK;
 }
 
 /**
@@ -866,12 +1104,17 @@ static void rio_update_route_tables(struct rio_mport *port)
                                continue;
 
                        if (RIO_INVALID_ROUTE == rswitch->route_table[destid]) {
+                               /* Skip if destid ends in empty switch*/
+                               if (rswitch->destid == destid)
+                                       continue;
 
                                sport = rio_get_swpinfo_inport(port,
                                                rswitch->destid, rswitch->hopcount);
 
                                if (rswitch->add_entry) {
-                                       rio_route_add_entry(port, rswitch, RIO_GLOBAL_TABLE, destid, sport);
+                                       rio_route_add_entry(port, rswitch,
+                                               RIO_GLOBAL_TABLE, destid,
+                                               sport, 0);
                                        rswitch->route_table[destid] = sport;
                                }
                        }
@@ -880,6 +1123,32 @@ static void rio_update_route_tables(struct rio_mport *port)
 }
 
 /**
+ * rio_init_em - Initializes RIO Error Management (for switches)
+ * @port: Master port associated with the RIO network
+ *
+ * For each enumerated switch, call device-specific error management
+ * initialization routine (if supplied by the switch driver).
+ */
+static void rio_init_em(struct rio_dev *rdev)
+{
+       if (rio_is_switch(rdev) && (rdev->em_efptr) &&
+           (rdev->rswitch->em_init)) {
+               rdev->rswitch->em_init(rdev);
+       }
+}
+
+/**
+ * rio_pw_enable - Enables/disables port-write handling by a master port
+ * @port: Master port associated with port-write handling
+ * @enable:  1=enable,  0=disable
+ */
+static void rio_pw_enable(struct rio_mport *port, int enable)
+{
+       if (port->ops->pwenable)
+               port->ops->pwenable(port, enable);
+}
+
+/**
  * rio_enum_mport- Start enumeration through a master port
  * @mport: Master port to send transactions
  *
@@ -911,6 +1180,10 @@ int __devinit rio_enum_mport(struct rio_mport *mport)
                        rc = -ENOMEM;
                        goto out;
                }
+
+               /* Enable Input Output Port (transmitter reviever) */
+               rio_enable_rx_tx_port(mport, 1, 0, 0, 0);
+
                if (rio_enum_peer(net, mport, 0) < 0) {
                        /* A higher priority host won enumeration, bail. */
                        printk(KERN_INFO
@@ -922,6 +1195,7 @@ int __devinit rio_enum_mport(struct rio_mport *mport)
                }
                rio_update_route_tables(mport);
                rio_clear_locks(mport);
+               rio_pw_enable(mport, 1);
        } else {
                printk(KERN_INFO "RIO: master port %d link inactive\n",
                       mport->id);
@@ -945,15 +1219,22 @@ static void rio_build_route_tables(void)
        u8 sport;
 
        list_for_each_entry(rdev, &rio_devices, global_list)
-           if (rio_is_switch(rdev))
-               for (i = 0;
-                    i < RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size);
-                    i++) {
-                       if (rio_route_get_entry
-                           (rdev->net->hport, rdev->rswitch, RIO_GLOBAL_TABLE,
-                            i, &sport) < 0)
-                               continue;
-                       rdev->rswitch->route_table[i] = sport;
+               if (rio_is_switch(rdev)) {
+                       rio_lock_device(rdev->net->hport, rdev->rswitch->destid,
+                                       rdev->rswitch->hopcount, 1000);
+                       for (i = 0;
+                            i < RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size);
+                            i++) {
+                               if (rio_route_get_entry
+                                   (rdev->net->hport, rdev->rswitch,
+                                    RIO_GLOBAL_TABLE, i, &sport, 0) < 0)
+                                       continue;
+                               rdev->rswitch->route_table[i] = sport;
+                       }
+
+                       rio_unlock_device(rdev->net->hport,
+                                         rdev->rswitch->destid,
+                                         rdev->rswitch->hopcount);
                }
 }
 
@@ -1012,6 +1293,13 @@ int __devinit rio_disc_mport(struct rio_mport *mport)
                del_timer_sync(&rio_enum_timer);
 
                pr_debug("done\n");
+
+               /* Read DestID assigned by enumerator */
+               rio_local_read_config_32(mport, RIO_DID_CSR,
+                                        &mport->host_deviceid);
+               mport->host_deviceid = RIO_GET_DID(mport->sys_size,
+                                                  mport->host_deviceid);
+
                if (rio_disc_peer(net, mport, RIO_ANY_DESTID(mport->sys_size),
                                        0) < 0) {
                        printk(KERN_INFO
index 6395c78..777e099 100644 (file)
@@ -5,6 +5,10 @@
  * Copyright 2005 MontaVista Software, Inc.
  * Matt Porter <mporter@kernel.crashing.org>
  *
+ * Copyright 2009 Integrated Device Technology, Inc.
+ * Alex Bounine <alexandre.bounine@idt.com>
+ * - Added Port-Write/Error Management initialization and handling
+ *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
  * Free Software Foundation;  either version 2 of the  License, or (at your
@@ -333,6 +337,328 @@ int rio_release_outb_dbell(struct rio_dev *rdev, struct resource *res)
 }
 
 /**
+ * rio_request_inb_pwrite - request inbound port-write message service
+ * @mport: RIO device to which register inbound port-write callback routine
+ * @pwcback: Callback routine to execute when port-write is received
+ *
+ * Binds a port-write callback function to the RapidIO device.
+ * Returns 0 if the request has been satisfied.
+ */
+int rio_request_inb_pwrite(struct rio_dev *rdev,
+       int (*pwcback)(struct rio_dev *rdev, union rio_pw_msg *msg, int step))
+{
+       int rc = 0;
+
+       spin_lock(&rio_global_list_lock);
+       if (rdev->pwcback != NULL)
+               rc = -ENOMEM;
+       else
+               rdev->pwcback = pwcback;
+
+       spin_unlock(&rio_global_list_lock);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(rio_request_inb_pwrite);
+
+/**
+ * rio_release_inb_pwrite - release inbound port-write message service
+ * @rdev: RIO device which registered for inbound port-write callback
+ *
+ * Removes callback from the rio_dev structure. Returns 0 if the request
+ * has been satisfied.
+ */
+int rio_release_inb_pwrite(struct rio_dev *rdev)
+{
+       int rc = -ENOMEM;
+
+       spin_lock(&rio_global_list_lock);
+       if (rdev->pwcback) {
+               rdev->pwcback = NULL;
+               rc = 0;
+       }
+
+       spin_unlock(&rio_global_list_lock);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(rio_release_inb_pwrite);
+
+/**
+ * rio_mport_get_physefb - Helper function that returns register offset
+ *                      for Physical Layer Extended Features Block.
+ * @rdev: RIO device
+ */
+u32
+rio_mport_get_physefb(struct rio_mport *port, int local,
+                     u16 destid, u8 hopcount)
+{
+       u32 ext_ftr_ptr;
+       u32 ftr_header;
+
+       ext_ftr_ptr = rio_mport_get_efb(port, local, destid, hopcount, 0);
+
+       while (ext_ftr_ptr)  {
+               if (local)
+                       rio_local_read_config_32(port, ext_ftr_ptr,
+                                                &ftr_header);
+               else
+                       rio_mport_read_config_32(port, destid, hopcount,
+                                                ext_ftr_ptr, &ftr_header);
+
+               ftr_header = RIO_GET_BLOCK_ID(ftr_header);
+               switch (ftr_header) {
+
+               case RIO_EFB_SER_EP_ID_V13P:
+               case RIO_EFB_SER_EP_REC_ID_V13P:
+               case RIO_EFB_SER_EP_FREE_ID_V13P:
+               case RIO_EFB_SER_EP_ID:
+               case RIO_EFB_SER_EP_REC_ID:
+               case RIO_EFB_SER_EP_FREE_ID:
+               case RIO_EFB_SER_EP_FREC_ID:
+
+                       return ext_ftr_ptr;
+
+               default:
+                       break;
+               }
+
+               ext_ftr_ptr = rio_mport_get_efb(port, local, destid,
+                                               hopcount, ext_ftr_ptr);
+       }
+
+       return ext_ftr_ptr;
+}
+
+/**
+ * rio_get_comptag - Begin or continue searching for a RIO device by component tag
+ * @comp_tag: RIO component tad to match
+ * @from: Previous RIO device found in search, or %NULL for new search
+ *
+ * Iterates through the list of known RIO devices. If a RIO device is
+ * found with a matching @comp_tag, a pointer to its device
+ * structure is returned. Otherwise, %NULL is returned. A new search
+ * is initiated by passing %NULL to the @from argument. Otherwise, if
+ * @from is not %NULL, searches continue from next device on the global
+ * list.
+ */
+static struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from)
+{
+       struct list_head *n;
+       struct rio_dev *rdev;
+
+       spin_lock(&rio_global_list_lock);
+       n = from ? from->global_list.next : rio_devices.next;
+
+       while (n && (n != &rio_devices)) {
+               rdev = rio_dev_g(n);
+               if (rdev->comp_tag == comp_tag)
+                       goto exit;
+               n = n->next;
+       }
+       rdev = NULL;
+exit:
+       spin_unlock(&rio_global_list_lock);
+       return rdev;
+}
+
+/**
+ * rio_set_port_lockout - Sets/clears LOCKOUT bit (RIO EM 1.3) for a switch port.
+ * @rdev: Pointer to RIO device control structure
+ * @pnum: Switch port number to set LOCKOUT bit
+ * @lock: Operation : set (=1) or clear (=0)
+ */
+int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock)
+{
+       u8 hopcount = 0xff;
+       u16 destid = rdev->destid;
+       u32 regval;
+
+       if (rdev->rswitch) {
+               destid = rdev->rswitch->destid;
+               hopcount = rdev->rswitch->hopcount;
+       }
+
+       rio_mport_read_config_32(rdev->net->hport, destid, hopcount,
+                                rdev->phys_efptr + RIO_PORT_N_CTL_CSR(pnum),
+                                &regval);
+       if (lock)
+               regval |= RIO_PORT_N_CTL_LOCKOUT;
+       else
+               regval &= ~RIO_PORT_N_CTL_LOCKOUT;
+
+       rio_mport_write_config_32(rdev->net->hport, destid, hopcount,
+                                 rdev->phys_efptr + RIO_PORT_N_CTL_CSR(pnum),
+                                 regval);
+       return 0;
+}
+
+/**
+ * rio_inb_pwrite_handler - process inbound port-write message
+ * @pw_msg: pointer to inbound port-write message
+ *
+ * Processes an inbound port-write message. Returns 0 if the request
+ * has been satisfied.
+ */
+int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
+{
+       struct rio_dev *rdev;
+       struct rio_mport *mport;
+       u8 hopcount;
+       u16 destid;
+       u32 err_status;
+       int rc, portnum;
+
+       rdev = rio_get_comptag(pw_msg->em.comptag, NULL);
+       if (rdev == NULL) {
+               /* Someting bad here (probably enumeration error) */
+               pr_err("RIO: %s No matching device for CTag 0x%08x\n",
+                       __func__, pw_msg->em.comptag);
+               return -EIO;
+       }
+
+       pr_debug("RIO: Port-Write message from %s\n", rio_name(rdev));
+
+#ifdef DEBUG_PW
+       {
+       u32 i;
+       for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32);) {
+                       pr_debug("0x%02x: %08x %08x %08x %08x",
+                                i*4, pw_msg->raw[i], pw_msg->raw[i + 1],
+                                pw_msg->raw[i + 2], pw_msg->raw[i + 3]);
+                       i += 4;
+       }
+       pr_debug("\n");
+       }
+#endif
+
+       /* Call an external service function (if such is registered
+        * for this device). This may be the service for endpoints that send
+        * device-specific port-write messages. End-point messages expected
+        * to be handled completely by EP specific device driver.
+        * For switches rc==0 signals that no standard processing required.
+        */
+       if (rdev->pwcback != NULL) {
+               rc = rdev->pwcback(rdev, pw_msg, 0);
+               if (rc == 0)
+                       return 0;
+       }
+
+       /* For End-point devices processing stops here */
+       if (!(rdev->pef & RIO_PEF_SWITCH))
+               return 0;
+
+       if (rdev->phys_efptr == 0) {
+               pr_err("RIO_PW: Bad switch initialization for %s\n",
+                       rio_name(rdev));
+               return 0;
+       }
+
+       mport = rdev->net->hport;
+       destid = rdev->rswitch->destid;
+       hopcount = rdev->rswitch->hopcount;
+
+       /*
+        * Process the port-write notification from switch
+        */
+
+       portnum = pw_msg->em.is_port & 0xFF;
+
+       if (rdev->rswitch->em_handle)
+               rdev->rswitch->em_handle(rdev, portnum);
+
+       rio_mport_read_config_32(mport, destid, hopcount,
+                       rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
+                       &err_status);
+       pr_debug("RIO_PW: SP%d_ERR_STS_CSR=0x%08x\n", portnum, err_status);
+
+       if (pw_msg->em.errdetect) {
+               pr_debug("RIO_PW: RIO_EM_P%d_ERR_DETECT=0x%08x\n",
+                        portnum, pw_msg->em.errdetect);
+               /* Clear EM Port N Error Detect CSR */
+               rio_mport_write_config_32(mport, destid, hopcount,
+                       rdev->em_efptr + RIO_EM_PN_ERR_DETECT(portnum), 0);
+       }
+
+       if (pw_msg->em.ltlerrdet) {
+               pr_debug("RIO_PW: RIO_EM_LTL_ERR_DETECT=0x%08x\n",
+                        pw_msg->em.ltlerrdet);
+               /* Clear EM L/T Layer Error Detect CSR */
+               rio_mport_write_config_32(mport, destid, hopcount,
+                       rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0);
+       }
+
+       /* Clear Port Errors */
+       rio_mport_write_config_32(mport, destid, hopcount,
+                       rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
+                       err_status & RIO_PORT_N_ERR_STS_CLR_MASK);
+
+       if (rdev->rswitch->port_ok & (1 << portnum)) {
+               if (err_status & RIO_PORT_N_ERR_STS_PORT_UNINIT) {
+                       rdev->rswitch->port_ok &= ~(1 << portnum);
+                       rio_set_port_lockout(rdev, portnum, 1);
+
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                               rdev->phys_efptr +
+                                       RIO_PORT_N_ACK_STS_CSR(portnum),
+                               RIO_PORT_N_ACK_CLEAR);
+
+                       /* Schedule Extraction Service */
+                       pr_debug("RIO_PW: Device Extraction on [%s]-P%d\n",
+                              rio_name(rdev), portnum);
+               }
+       } else {
+               if (err_status & RIO_PORT_N_ERR_STS_PORT_OK) {
+                       rdev->rswitch->port_ok |= (1 << portnum);
+                       rio_set_port_lockout(rdev, portnum, 0);
+
+                       /* Schedule Insertion Service */
+                       pr_debug("RIO_PW: Device Insertion on [%s]-P%d\n",
+                              rio_name(rdev), portnum);
+               }
+       }
+
+       /* Clear Port-Write Pending bit */
+       rio_mport_write_config_32(mport, destid, hopcount,
+                       rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
+                       RIO_PORT_N_ERR_STS_PW_PEND);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rio_inb_pwrite_handler);
+
+/**
+ * rio_mport_get_efb - get pointer to next extended features block
+ * @port: Master port to issue transaction
+ * @local: Indicate a local master port or remote device access
+ * @destid: Destination ID of the device
+ * @hopcount: Number of switch hops to the device
+ * @from: Offset of  current Extended Feature block header (if 0 starts
+ * from        ExtFeaturePtr)
+ */
+u32
+rio_mport_get_efb(struct rio_mport *port, int local, u16 destid,
+                     u8 hopcount, u32 from)
+{
+       u32 reg_val;
+
+       if (from == 0) {
+               if (local)
+                       rio_local_read_config_32(port, RIO_ASM_INFO_CAR,
+                                                &reg_val);
+               else
+                       rio_mport_read_config_32(port, destid, hopcount,
+                                                RIO_ASM_INFO_CAR, &reg_val);
+               return reg_val & RIO_EXT_FTR_PTR_MASK;
+       } else {
+               if (local)
+                       rio_local_read_config_32(port, from, &reg_val);
+               else
+                       rio_mport_read_config_32(port, destid, hopcount,
+                                                from, &reg_val);
+               return RIO_GET_BLOCK_ID(reg_val);
+       }
+}
+
+/**
  * rio_mport_get_feature - query for devices' extended features
  * @port: Master port to issue transaction
  * @local: Indicate a local master port or remote device access
@@ -451,6 +777,111 @@ struct rio_dev *rio_get_device(u16 vid, u16 did, struct rio_dev *from)
        return rio_get_asm(vid, did, RIO_ANY_ID, RIO_ANY_ID, from);
 }
 
+/**
+ * rio_std_route_add_entry - Add switch route table entry using standard
+ *   registers defined in RIO specification rev.1.3
+ * @mport: Master port to issue transaction
+ * @destid: Destination ID of the device
+ * @hopcount: Number of switch hops to the device
+ * @table: routing table ID (global or port-specific)
+ * @route_destid: destID entry in the RT
+ * @route_port: destination port for specified destID
+ */
+int rio_std_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table, u16 route_destid, u8 route_port)
+{
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_DESTID_SEL_CSR,
+                               (u32)route_destid);
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_PORT_SEL_CSR,
+                               (u32)route_port);
+       }
+
+       udelay(10);
+       return 0;
+}
+
+/**
+ * rio_std_route_get_entry - Read switch route table entry (port number)
+ *   assosiated with specified destID using standard registers defined in RIO
+ *   specification rev.1.3
+ * @mport: Master port to issue transaction
+ * @destid: Destination ID of the device
+ * @hopcount: Number of switch hops to the device
+ * @table: routing table ID (global or port-specific)
+ * @route_destid: destID entry in the RT
+ * @route_port: returned destination port for specified destID
+ */
+int rio_std_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table, u16 route_destid, u8 *route_port)
+{
+       u32 result;
+
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_DESTID_SEL_CSR, route_destid);
+               rio_mport_read_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_PORT_SEL_CSR, &result);
+
+               *route_port = (u8)result;
+       }
+
+       return 0;
+}
+
+/**
+ * rio_std_route_clr_table - Clear swotch route table using standard registers
+ *   defined in RIO specification rev.1.3.
+ * @mport: Master port to issue transaction
+ * @local: Indicate a local master port or remote device access
+ * @destid: Destination ID of the device
+ * @hopcount: Number of switch hops to the device
+ * @table: routing table ID (global or port-specific)
+ */
+int rio_std_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table)
+{
+       u32 max_destid = 0xff;
+       u32 i, pef, id_inc = 1, ext_cfg = 0;
+       u32 port_sel = RIO_INVALID_ROUTE;
+
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_read_config_32(mport, destid, hopcount,
+                                        RIO_PEF_CAR, &pef);
+
+               if (mport->sys_size) {
+                       rio_mport_read_config_32(mport, destid, hopcount,
+                                                RIO_SWITCH_RT_LIMIT,
+                                                &max_destid);
+                       max_destid &= RIO_RT_MAX_DESTID;
+               }
+
+               if (pef & RIO_PEF_EXT_RT) {
+                       ext_cfg = 0x80000000;
+                       id_inc = 4;
+                       port_sel = (RIO_INVALID_ROUTE << 24) |
+                                  (RIO_INVALID_ROUTE << 16) |
+                                  (RIO_INVALID_ROUTE << 8) |
+                                  RIO_INVALID_ROUTE;
+               }
+
+               for (i = 0; i <= max_destid;) {
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                                       RIO_STD_RTE_CONF_DESTID_SEL_CSR,
+                                       ext_cfg | i);
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                                       RIO_STD_RTE_CONF_PORT_SEL_CSR,
+                                       port_sel);
+                       i += id_inc;
+               }
+       }
+
+       udelay(10);
+       return 0;
+}
+
 static void rio_fixup_device(struct rio_dev *dev)
 {
 }
index 7786d02..f27b7a9 100644 (file)
 
 extern u32 rio_mport_get_feature(struct rio_mport *mport, int local, u16 destid,
                                 u8 hopcount, int ftr);
+extern u32 rio_mport_get_physefb(struct rio_mport *port, int local,
+                                u16 destid, u8 hopcount);
+extern u32 rio_mport_get_efb(struct rio_mport *port, int local, u16 destid,
+                            u8 hopcount, u32 from);
 extern int rio_create_sysfs_dev_files(struct rio_dev *rdev);
 extern int rio_enum_mport(struct rio_mport *mport);
 extern int rio_disc_mport(struct rio_mport *mport);
+extern int rio_std_route_add_entry(struct rio_mport *mport, u16 destid,
+                                  u8 hopcount, u16 table, u16 route_destid,
+                                  u8 route_port);
+extern int rio_std_route_get_entry(struct rio_mport *mport, u16 destid,
+                                  u8 hopcount, u16 table, u16 route_destid,
+                                  u8 *route_port);
+extern int rio_std_route_clr_table(struct rio_mport *mport, u16 destid,
+                                  u8 hopcount, u16 table);
+extern int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock);
 
 /* Structures internal to the RIO core code */
 extern struct device_attribute rio_dev_attrs[];
 extern spinlock_t rio_global_list_lock;
 
-extern struct rio_route_ops __start_rio_route_ops[];
-extern struct rio_route_ops __end_rio_route_ops[];
+extern struct rio_switch_ops __start_rio_switch_ops[];
+extern struct rio_switch_ops __end_rio_switch_ops[];
 
 /* Helpers internal to the RIO core code */
-#define DECLARE_RIO_ROUTE_SECTION(section, vid, did, add_hook, get_hook)  \
-       static struct rio_route_ops __rio_route_ops __used   \
-       __section(section)= { vid, did, add_hook, get_hook };
+#define DECLARE_RIO_SWITCH_SECTION(section, name, vid, did, init_hook) \
+       static const struct rio_switch_ops __rio_switch_##name __used \
+       __section(section) = { vid, did, init_hook };
 
 /**
- * DECLARE_RIO_ROUTE_OPS - Registers switch routing operations
+ * DECLARE_RIO_SWITCH_INIT - Registers switch initialization routine
  * @vid: RIO vendor ID
  * @did: RIO device ID
- * @add_hook: Callback that adds a route entry
- * @get_hook: Callback that gets a route entry
+ * @init_hook: Callback that performs switch-specific initialization
  *
- * Manipulating switch route tables in RIO is switch specific. This
- * registers a switch by vendor and device ID with two callbacks for
- * modifying and retrieving route entries in a switch. A &struct
- * rio_route_ops is initialized with the ops and placed into a
- * RIO-specific kernel section.
+ * Manipulating switch route tables and error management in RIO
+ * is switch specific. This registers a switch by vendor and device ID with
+ * initialization callback for setting up switch operations and (if required)
+ * hardware initialization. A &struct rio_switch_ops is initialized with
+ * pointer to the init routine and placed into a RIO-specific kernel section.
  */
-#define DECLARE_RIO_ROUTE_OPS(vid, did, add_hook, get_hook)            \
-       DECLARE_RIO_ROUTE_SECTION(.rio_route_ops,                       \
-                       vid, did, add_hook, get_hook)
+#define DECLARE_RIO_SWITCH_INIT(vid, did, init_hook)           \
+       DECLARE_RIO_SWITCH_SECTION(.rio_switch_ops, vid##did, \
+                       vid, did, init_hook)
 
 #define RIO_GET_DID(size, x)   (size ? (x & 0xffff) : ((x & 0x00ff0000) >> 16))
 #define RIO_SET_DID(size, x)   (size ? (x & 0xffff) : ((x & 0x000000ff) << 16))
diff --git a/drivers/rapidio/switches/Kconfig b/drivers/rapidio/switches/Kconfig
new file mode 100644 (file)
index 0000000..2b4e9b2
--- /dev/null
@@ -0,0 +1,28 @@
+#
+# RapidIO switches configuration
+#
+config RAPIDIO_TSI57X
+       bool "IDT Tsi57x SRIO switches support"
+       depends on RAPIDIO
+       ---help---
+         Includes support for IDT Tsi57x family of serial RapidIO switches.
+
+config RAPIDIO_CPS_XX
+       bool "IDT CPS-xx SRIO switches support"
+       depends on RAPIDIO
+       ---help---
+         Includes support for IDT CPS-16/12/10/8 serial RapidIO switches.
+
+config RAPIDIO_TSI568
+       bool "Tsi568 SRIO switch support"
+       depends on RAPIDIO
+       default n
+       ---help---
+         Includes support for IDT Tsi568 serial RapidIO switch.
+
+config RAPIDIO_TSI500
+       bool "Tsi500 Parallel RapidIO switch support"
+       depends on RAPIDIO
+       default n
+       ---help---
+         Includes support for IDT Tsi500 parallel RapidIO switch.
index b924f83..fe4adc3 100644 (file)
@@ -2,4 +2,11 @@
 # Makefile for RIO switches
 #
 
-obj-$(CONFIG_RAPIDIO)  += tsi500.o
+obj-$(CONFIG_RAPIDIO_TSI57X)   += tsi57x.o
+obj-$(CONFIG_RAPIDIO_CPS_XX)   += idtcps.o
+obj-$(CONFIG_RAPIDIO_TSI568)   += tsi568.o
+obj-$(CONFIG_RAPIDIO_TSI500)   += tsi500.o
+
+ifeq ($(CONFIG_RAPIDIO_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
+endif
diff --git a/drivers/rapidio/switches/idtcps.c b/drivers/rapidio/switches/idtcps.c
new file mode 100644 (file)
index 0000000..2c790c1
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * IDT CPS RapidIO switches support
+ *
+ * Copyright 2009-2010 Integrated Device Technology, Inc.
+ * Alexandre Bounine <alexandre.bounine@idt.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/rio.h>
+#include <linux/rio_drv.h>
+#include <linux/rio_ids.h>
+#include "../rio.h"
+
+#define CPS_DEFAULT_ROUTE      0xde
+#define CPS_NO_ROUTE           0xdf
+
+#define IDTCPS_RIO_DOMAIN 0xf20020
+
+static int
+idtcps_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table, u16 route_destid, u8 route_port)
+{
+       u32 result;
+
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_DESTID_SEL_CSR, route_destid);
+
+               rio_mport_read_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_PORT_SEL_CSR, &result);
+
+               result = (0xffffff00 & result) | (u32)route_port;
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_PORT_SEL_CSR, result);
+       }
+
+       return 0;
+}
+
+static int
+idtcps_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table, u16 route_destid, u8 *route_port)
+{
+       u32 result;
+
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_DESTID_SEL_CSR, route_destid);
+
+               rio_mport_read_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_PORT_SEL_CSR, &result);
+
+               if (CPS_DEFAULT_ROUTE == (u8)result ||
+                   CPS_NO_ROUTE == (u8)result)
+                       *route_port = RIO_INVALID_ROUTE;
+               else
+                       *route_port = (u8)result;
+       }
+
+       return 0;
+}
+
+static int
+idtcps_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table)
+{
+       u32 i;
+
+       if (table == RIO_GLOBAL_TABLE) {
+               for (i = 0x80000000; i <= 0x800000ff;) {
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_DESTID_SEL_CSR, i);
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                               RIO_STD_RTE_CONF_PORT_SEL_CSR,
+                               (CPS_DEFAULT_ROUTE << 24) |
+                               (CPS_DEFAULT_ROUTE << 16) |
+                               (CPS_DEFAULT_ROUTE << 8) | CPS_DEFAULT_ROUTE);
+                       i += 4;
+               }
+       }
+
+       return 0;
+}
+
+static int
+idtcps_set_domain(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u8 sw_domain)
+{
+       /*
+        * Switch domain configuration operates only at global level
+        */
+       rio_mport_write_config_32(mport, destid, hopcount,
+                                 IDTCPS_RIO_DOMAIN, (u32)sw_domain);
+       return 0;
+}
+
+static int
+idtcps_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u8 *sw_domain)
+{
+       u32 regval;
+
+       /*
+        * Switch domain configuration operates only at global level
+        */
+       rio_mport_read_config_32(mport, destid, hopcount,
+                               IDTCPS_RIO_DOMAIN, &regval);
+
+       *sw_domain = (u8)(regval & 0xff);
+
+       return 0;
+}
+
+static int idtcps_switch_init(struct rio_dev *rdev, int do_enum)
+{
+       pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
+       rdev->rswitch->add_entry = idtcps_route_add_entry;
+       rdev->rswitch->get_entry = idtcps_route_get_entry;
+       rdev->rswitch->clr_table = idtcps_route_clr_table;
+       rdev->rswitch->set_domain = idtcps_set_domain;
+       rdev->rswitch->get_domain = idtcps_get_domain;
+       rdev->rswitch->em_init = NULL;
+       rdev->rswitch->em_handle = NULL;
+
+       return 0;
+}
+
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS6Q, idtcps_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS8, idtcps_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS10Q, idtcps_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS12, idtcps_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDTCPS16, idtcps_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_IDT, RIO_DID_IDT70K200, idtcps_switch_init);
index c77c23b..914eddd 100644 (file)
@@ -1,6 +1,10 @@
 /*
  * RapidIO Tsi500 switch support
  *
+ * Copyright 2009-2010 Integrated Device Technology, Inc.
+ * Alexandre Bounine <alexandre.bounine@idt.com>
+ *  - Modified switch operations initialization.
+ *
  * Copyright 2005 MontaVista Software, Inc.
  * Matt Porter <mporter@kernel.crashing.org>
  *
@@ -57,4 +61,18 @@ tsi500_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount, u16 tab
        return ret;
 }
 
-DECLARE_RIO_ROUTE_OPS(RIO_VID_TUNDRA, RIO_DID_TSI500, tsi500_route_add_entry, tsi500_route_get_entry);
+static int tsi500_switch_init(struct rio_dev *rdev, int do_enum)
+{
+       pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
+       rdev->rswitch->add_entry = tsi500_route_add_entry;
+       rdev->rswitch->get_entry = tsi500_route_get_entry;
+       rdev->rswitch->clr_table = NULL;
+       rdev->rswitch->set_domain = NULL;
+       rdev->rswitch->get_domain = NULL;
+       rdev->rswitch->em_init = NULL;
+       rdev->rswitch->em_handle = NULL;
+
+       return 0;
+}
+
+DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI500, tsi500_switch_init);
diff --git a/drivers/rapidio/switches/tsi568.c b/drivers/rapidio/switches/tsi568.c
new file mode 100644 (file)
index 0000000..f7fd789
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * RapidIO Tsi568 switch support
+ *
+ * Copyright 2009-2010 Integrated Device Technology, Inc.
+ * Alexandre Bounine <alexandre.bounine@idt.com>
+ *  - Added EM support
+ *  - Modified switch operations initialization.
+ *
+ * Copyright 2005 MontaVista Software, Inc.
+ * Matt Porter <mporter@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/rio.h>
+#include <linux/rio_drv.h>
+#include <linux/rio_ids.h>
+#include <linux/delay.h>
+#include "../rio.h"
+
+/* Global (broadcast) route registers */
+#define SPBC_ROUTE_CFG_DESTID  0x10070
+#define SPBC_ROUTE_CFG_PORT    0x10074
+
+/* Per port route registers */
+#define SPP_ROUTE_CFG_DESTID(n)        (0x11070 + 0x100*n)
+#define SPP_ROUTE_CFG_PORT(n)  (0x11074 + 0x100*n)
+
+#define TSI568_SP_MODE_BC      0x10004
+#define  TSI568_SP_MODE_PW_DIS 0x08000000
+
+static int
+tsi568_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table, u16 route_destid, u8 route_port)
+{
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                       SPBC_ROUTE_CFG_DESTID, route_destid);
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                       SPBC_ROUTE_CFG_PORT, route_port);
+       } else {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                       SPP_ROUTE_CFG_DESTID(table),
+                                       route_destid);
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                       SPP_ROUTE_CFG_PORT(table), route_port);
+       }
+
+       udelay(10);
+
+       return 0;
+}
+
+static int
+tsi568_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table, u16 route_destid, u8 *route_port)
+{
+       int ret = 0;
+       u32 result;
+
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                       SPBC_ROUTE_CFG_DESTID, route_destid);
+               rio_mport_read_config_32(mport, destid, hopcount,
+                                       SPBC_ROUTE_CFG_PORT, &result);
+       } else {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                       SPP_ROUTE_CFG_DESTID(table),
+                                       route_destid);
+               rio_mport_read_config_32(mport, destid, hopcount,
+                                       SPP_ROUTE_CFG_PORT(table), &result);
+       }
+
+       *route_port = result;
+       if (*route_port > 15)
+               ret = -1;
+
+       return ret;
+}
+
+static int
+tsi568_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table)
+{
+       u32 route_idx;
+       u32 lut_size;
+
+       lut_size = (mport->sys_size) ? 0x1ff : 0xff;
+
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                       SPBC_ROUTE_CFG_DESTID, 0x80000000);
+               for (route_idx = 0; route_idx <= lut_size; route_idx++)
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                                               SPBC_ROUTE_CFG_PORT,
+                                               RIO_INVALID_ROUTE);
+       } else {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                       SPP_ROUTE_CFG_DESTID(table),
+                                       0x80000000);
+               for (route_idx = 0; route_idx <= lut_size; route_idx++)
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                                               SPP_ROUTE_CFG_PORT(table),
+                                               RIO_INVALID_ROUTE);
+       }
+
+       return 0;
+}
+
+static int
+tsi568_em_init(struct rio_dev *rdev)
+{
+       struct rio_mport *mport = rdev->net->hport;
+       u16 destid = rdev->rswitch->destid;
+       u8 hopcount = rdev->rswitch->hopcount;
+       u32 regval;
+
+       pr_debug("TSI568 %s [%d:%d]\n", __func__, destid, hopcount);
+
+       /* Make sure that Port-Writes are disabled (for all ports) */
+       rio_mport_read_config_32(mport, destid, hopcount,
+                       TSI568_SP_MODE_BC, &regval);
+       rio_mport_write_config_32(mport, destid, hopcount,
+                       TSI568_SP_MODE_BC, regval | TSI568_SP_MODE_PW_DIS);
+
+       return 0;
+}
+
+static int tsi568_switch_init(struct rio_dev *rdev, int do_enum)
+{
+       pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
+       rdev->rswitch->add_entry = tsi568_route_add_entry;
+       rdev->rswitch->get_entry = tsi568_route_get_entry;
+       rdev->rswitch->clr_table = tsi568_route_clr_table;
+       rdev->rswitch->set_domain = NULL;
+       rdev->rswitch->get_domain = NULL;
+       rdev->rswitch->em_init = tsi568_em_init;
+       rdev->rswitch->em_handle = NULL;
+
+       return 0;
+}
+
+DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI568, tsi568_switch_init);
diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c
new file mode 100644 (file)
index 0000000..d34df72
--- /dev/null
@@ -0,0 +1,315 @@
+/*
+ * RapidIO Tsi57x switch family support
+ *
+ * Copyright 2009-2010 Integrated Device Technology, Inc.
+ * Alexandre Bounine <alexandre.bounine@idt.com>
+ *  - Added EM support
+ *  - Modified switch operations initialization.
+ *
+ * Copyright 2005 MontaVista Software, Inc.
+ * Matt Porter <mporter@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/rio.h>
+#include <linux/rio_drv.h>
+#include <linux/rio_ids.h>
+#include <linux/delay.h>
+#include "../rio.h"
+
+/* Global (broadcast) route registers */
+#define SPBC_ROUTE_CFG_DESTID  0x10070
+#define SPBC_ROUTE_CFG_PORT    0x10074
+
+/* Per port route registers */
+#define SPP_ROUTE_CFG_DESTID(n)        (0x11070 + 0x100*n)
+#define SPP_ROUTE_CFG_PORT(n)  (0x11074 + 0x100*n)
+
+#define TSI578_SP_MODE(n)      (0x11004 + n*0x100)
+#define TSI578_SP_MODE_GLBL    0x10004
+#define  TSI578_SP_MODE_PW_DIS 0x08000000
+#define  TSI578_SP_MODE_LUT_512        0x01000000
+
+#define TSI578_SP_CTL_INDEP(n) (0x13004 + n*0x100)
+#define TSI578_SP_LUT_PEINF(n) (0x13010 + n*0x100)
+#define TSI578_SP_CS_TX(n)     (0x13014 + n*0x100)
+#define TSI578_SP_INT_STATUS(n) (0x13018 + n*0x100)
+
+#define TSI578_GLBL_ROUTE_BASE 0x10078
+
+static int
+tsi57x_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table, u16 route_destid, u8 route_port)
+{
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                         SPBC_ROUTE_CFG_DESTID, route_destid);
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                         SPBC_ROUTE_CFG_PORT, route_port);
+       } else {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               SPP_ROUTE_CFG_DESTID(table), route_destid);
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               SPP_ROUTE_CFG_PORT(table), route_port);
+       }
+
+       udelay(10);
+
+       return 0;
+}
+
+static int
+tsi57x_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table, u16 route_destid, u8 *route_port)
+{
+       int ret = 0;
+       u32 result;
+
+       if (table == RIO_GLOBAL_TABLE) {
+               /* Use local RT of the ingress port to avoid possible
+                  race condition */
+               rio_mport_read_config_32(mport, destid, hopcount,
+                       RIO_SWP_INFO_CAR, &result);
+               table = (result & RIO_SWP_INFO_PORT_NUM_MASK);
+       }
+
+       rio_mport_write_config_32(mport, destid, hopcount,
+                               SPP_ROUTE_CFG_DESTID(table), route_destid);
+       rio_mport_read_config_32(mport, destid, hopcount,
+                               SPP_ROUTE_CFG_PORT(table), &result);
+
+       *route_port = (u8)result;
+       if (*route_port > 15)
+               ret = -1;
+
+       return ret;
+}
+
+static int
+tsi57x_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u16 table)
+{
+       u32 route_idx;
+       u32 lut_size;
+
+       lut_size = (mport->sys_size) ? 0x1ff : 0xff;
+
+       if (table == RIO_GLOBAL_TABLE) {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                                         SPBC_ROUTE_CFG_DESTID, 0x80000000);
+               for (route_idx = 0; route_idx <= lut_size; route_idx++)
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                                                 SPBC_ROUTE_CFG_PORT,
+                                                 RIO_INVALID_ROUTE);
+       } else {
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               SPP_ROUTE_CFG_DESTID(table), 0x80000000);
+               for (route_idx = 0; route_idx <= lut_size; route_idx++)
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                               SPP_ROUTE_CFG_PORT(table) , RIO_INVALID_ROUTE);
+       }
+
+       return 0;
+}
+
+static int
+tsi57x_set_domain(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u8 sw_domain)
+{
+       u32 regval;
+
+       /*
+        * Switch domain configuration operates only at global level
+        */
+
+       /* Turn off flat (LUT_512) mode */
+       rio_mport_read_config_32(mport, destid, hopcount,
+                                TSI578_SP_MODE_GLBL, &regval);
+       rio_mport_write_config_32(mport, destid, hopcount, TSI578_SP_MODE_GLBL,
+                                 regval & ~TSI578_SP_MODE_LUT_512);
+       /* Set switch domain base */
+       rio_mport_write_config_32(mport, destid, hopcount,
+                                 TSI578_GLBL_ROUTE_BASE,
+                                 (u32)(sw_domain << 24));
+       return 0;
+}
+
+static int
+tsi57x_get_domain(struct rio_mport *mport, u16 destid, u8 hopcount,
+                      u8 *sw_domain)
+{
+       u32 regval;
+
+       /*
+        * Switch domain configuration operates only at global level
+        */
+       rio_mport_read_config_32(mport, destid, hopcount,
+                               TSI578_GLBL_ROUTE_BASE, &regval);
+
+       *sw_domain = (u8)(regval >> 24);
+
+       return 0;
+}
+
+static int
+tsi57x_em_init(struct rio_dev *rdev)
+{
+       struct rio_mport *mport = rdev->net->hport;
+       u16 destid = rdev->rswitch->destid;
+       u8 hopcount = rdev->rswitch->hopcount;
+       u32 regval;
+       int portnum;
+
+       pr_debug("TSI578 %s [%d:%d]\n", __func__, destid, hopcount);
+
+       for (portnum = 0; portnum < 16; portnum++) {
+               /* Make sure that Port-Writes are enabled (for all ports) */
+               rio_mport_read_config_32(mport, destid, hopcount,
+                               TSI578_SP_MODE(portnum), &regval);
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               TSI578_SP_MODE(portnum),
+                               regval & ~TSI578_SP_MODE_PW_DIS);
+
+               /* Clear all pending interrupts */
+               rio_mport_read_config_32(mport, destid, hopcount,
+                               rdev->phys_efptr +
+                                       RIO_PORT_N_ERR_STS_CSR(portnum),
+                               &regval);
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               rdev->phys_efptr +
+                                       RIO_PORT_N_ERR_STS_CSR(portnum),
+                               regval & 0x07120214);
+
+               rio_mport_read_config_32(mport, destid, hopcount,
+                               TSI578_SP_INT_STATUS(portnum), &regval);
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               TSI578_SP_INT_STATUS(portnum),
+                               regval & 0x000700bd);
+
+               /* Enable all interrupts to allow ports to send a port-write */
+               rio_mport_read_config_32(mport, destid, hopcount,
+                               TSI578_SP_CTL_INDEP(portnum), &regval);
+               rio_mport_write_config_32(mport, destid, hopcount,
+                               TSI578_SP_CTL_INDEP(portnum),
+                               regval | 0x000b0000);
+
+               /* Skip next (odd) port if the current port is in x4 mode */
+               rio_mport_read_config_32(mport, destid, hopcount,
+                               rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum),
+                               &regval);
+               if ((regval & RIO_PORT_N_CTL_PWIDTH) == RIO_PORT_N_CTL_PWIDTH_4)
+                       portnum++;
+       }
+
+       return 0;
+}
+
+static int
+tsi57x_em_handler(struct rio_dev *rdev, u8 portnum)
+{
+       struct rio_mport *mport = rdev->net->hport;
+       u16 destid = rdev->rswitch->destid;
+       u8 hopcount = rdev->rswitch->hopcount;
+       u32 intstat, err_status;
+       int sendcount, checkcount;
+       u8 route_port;
+       u32 regval;
+
+       rio_mport_read_config_32(mport, destid, hopcount,
+                       rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
+                       &err_status);
+
+       if ((err_status & RIO_PORT_N_ERR_STS_PORT_OK) &&
+           (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES |
+                         RIO_PORT_N_ERR_STS_PW_INP_ES))) {
+               /* Remove any queued packets by locking/unlocking port */
+               rio_mport_read_config_32(mport, destid, hopcount,
+                       rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum),
+                       &regval);
+               if (!(regval & RIO_PORT_N_CTL_LOCKOUT)) {
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                               rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum),
+                               regval | RIO_PORT_N_CTL_LOCKOUT);
+                       udelay(50);
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                               rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum),
+                               regval);
+               }
+
+               /* Read from link maintenance response register to clear
+                * valid bit
+                */
+               rio_mport_read_config_32(mport, destid, hopcount,
+                       rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(portnum),
+                       &regval);
+
+               /* Send a Packet-Not-Accepted/Link-Request-Input-Status control
+                * symbol to recover from IES/OES
+                */
+               sendcount = 3;
+               while (sendcount) {
+                       rio_mport_write_config_32(mport, destid, hopcount,
+                                         TSI578_SP_CS_TX(portnum), 0x40fc8000);
+                       checkcount = 3;
+                       while (checkcount--) {
+                               udelay(50);
+                               rio_mport_read_config_32(
+                                       mport, destid, hopcount,
+                                       rdev->phys_efptr +
+                                               RIO_PORT_N_MNT_RSP_CSR(portnum),
+                                       &regval);
+                               if (regval & RIO_PORT_N_MNT_RSP_RVAL)
+                                       goto exit_es;
+                       }
+
+                       sendcount--;
+               }
+       }
+
+exit_es:
+       /* Clear implementation specific error status bits */
+       rio_mport_read_config_32(mport, destid, hopcount,
+                                TSI578_SP_INT_STATUS(portnum), &intstat);
+       pr_debug("TSI578[%x:%x] SP%d_INT_STATUS=0x%08x\n",
+                destid, hopcount, portnum, intstat);
+
+       if (intstat & 0x10000) {
+               rio_mport_read_config_32(mport, destid, hopcount,
+                               TSI578_SP_LUT_PEINF(portnum), &regval);
+               regval = (mport->sys_size) ? (regval >> 16) : (regval >> 24);
+               route_port = rdev->rswitch->route_table[regval];
+               pr_debug("RIO: TSI578[%s] P%d LUT Parity Error (destID=%d)\n",
+                       rio_name(rdev), portnum, regval);
+               tsi57x_route_add_entry(mport, destid, hopcount,
+                               RIO_GLOBAL_TABLE, regval, route_port);
+       }
+
+       rio_mport_write_config_32(mport, destid, hopcount,
+                                 TSI578_SP_INT_STATUS(portnum),
+                                 intstat & 0x000700bd);
+
+       return 0;
+}
+
+static int tsi57x_switch_init(struct rio_dev *rdev, int do_enum)
+{
+       pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev));
+       rdev->rswitch->add_entry = tsi57x_route_add_entry;
+       rdev->rswitch->get_entry = tsi57x_route_get_entry;
+       rdev->rswitch->clr_table = tsi57x_route_clr_table;
+       rdev->rswitch->set_domain = tsi57x_set_domain;
+       rdev->rswitch->get_domain = tsi57x_get_domain;
+       rdev->rswitch->em_init = tsi57x_em_init;
+       rdev->rswitch->em_handle = tsi57x_em_handler;
+
+       return 0;
+}
+
+DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI572, tsi57x_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI574, tsi57x_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI577, tsi57x_switch_init);
+DECLARE_RIO_SWITCH_INIT(RIO_VID_TUNDRA, RIO_DID_TSI578, tsi57x_switch_init);
index f159832..10ba12c 100644 (file)
@@ -611,6 +611,13 @@ config RTC_DRV_AB3100
          Select this to enable the ST-Ericsson AB3100 Mixed Signal IC RTC
          support. This chip contains a battery- and capacitor-backed RTC.
 
+config RTC_DRV_AB8500
+       tristate "ST-Ericsson AB8500 RTC"
+       depends on AB8500_CORE
+       help
+         Select this to enable the ST-Ericsson AB8500 power management IC RTC
+         support. This chip contains a battery- and capacitor-backed RTC.
+
 config RTC_DRV_NUC900
        tristate "NUC910/NUC920 RTC driver"
        depends on RTC_CLASS && ARCH_W90X900
index 245311a..5adbba7 100644 (file)
@@ -18,6 +18,7 @@ rtc-core-$(CONFIG_RTC_INTF_SYSFS) += rtc-sysfs.o
 # Keep the list ordered.
 
 obj-$(CONFIG_RTC_DRV_AB3100)   += rtc-ab3100.o
+obj-$(CONFIG_RTC_DRV_AB8500)   += rtc-ab8500.o
 obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o
 obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o
 obj-$(CONFIG_RTC_DRV_AT91SAM9) += rtc-at91sam9.o
diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
new file mode 100644 (file)
index 0000000..2fda031
--- /dev/null
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ * Author: Virupax Sadashivpetimath <virupax.sadashivpetimath@stericsson.com>
+ *
+ * RTC clock driver for the RTC part of the AB8500 Power management chip.
+ * Based on RTC clock driver for the AB3100 Analog Baseband Chip by
+ * Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+#include <linux/mfd/ab8500.h>
+#include <linux/delay.h>
+
+#define AB8500_RTC_SOFF_STAT_REG       0x0F00
+#define AB8500_RTC_CC_CONF_REG         0x0F01
+#define AB8500_RTC_READ_REQ_REG                0x0F02
+#define AB8500_RTC_WATCH_TSECMID_REG   0x0F03
+#define AB8500_RTC_WATCH_TSECHI_REG    0x0F04
+#define AB8500_RTC_WATCH_TMIN_LOW_REG  0x0F05
+#define AB8500_RTC_WATCH_TMIN_MID_REG  0x0F06
+#define AB8500_RTC_WATCH_TMIN_HI_REG   0x0F07
+#define AB8500_RTC_ALRM_MIN_LOW_REG    0x0F08
+#define AB8500_RTC_ALRM_MIN_MID_REG    0x0F09
+#define AB8500_RTC_ALRM_MIN_HI_REG     0x0F0A
+#define AB8500_RTC_STAT_REG            0x0F0B
+#define AB8500_RTC_BKUP_CHG_REG                0x0F0C
+#define AB8500_RTC_FORCE_BKUP_REG      0x0F0D
+#define AB8500_RTC_CALIB_REG           0x0F0E
+#define AB8500_RTC_SWITCH_STAT_REG     0x0F0F
+#define AB8500_REV_REG                 0x1080
+
+/* RtcReadRequest bits */
+#define RTC_READ_REQUEST               0x01
+#define RTC_WRITE_REQUEST              0x02
+
+/* RtcCtrl bits */
+#define RTC_ALARM_ENA                  0x04
+#define RTC_STATUS_DATA                        0x01
+
+#define COUNTS_PER_SEC                 (0xF000 / 60)
+#define AB8500_RTC_EPOCH               2000
+
+static const unsigned long ab8500_rtc_time_regs[] = {
+       AB8500_RTC_WATCH_TMIN_HI_REG, AB8500_RTC_WATCH_TMIN_MID_REG,
+       AB8500_RTC_WATCH_TMIN_LOW_REG, AB8500_RTC_WATCH_TSECHI_REG,
+       AB8500_RTC_WATCH_TSECMID_REG
+};
+
+static const unsigned long ab8500_rtc_alarm_regs[] = {
+       AB8500_RTC_ALRM_MIN_HI_REG, AB8500_RTC_ALRM_MIN_MID_REG,
+       AB8500_RTC_ALRM_MIN_LOW_REG
+};
+
+/* Calculate the seconds from 1970 to 01-01-2000 00:00:00 */
+static unsigned long get_elapsed_seconds(int year)
+{
+       unsigned long secs;
+       struct rtc_time tm = {
+               .tm_year = year - 1900,
+               .tm_mday = 1,
+       };
+
+       /*
+        * This function calculates secs from 1970 and not from
+        * 1900, even if we supply the offset from year 1900.
+        */
+       rtc_tm_to_time(&tm, &secs);
+       return secs;
+}
+
+static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
+       unsigned long timeout = jiffies + HZ;
+       int retval, i;
+       unsigned long mins, secs;
+       unsigned char buf[ARRAY_SIZE(ab8500_rtc_time_regs)];
+
+       /* Request a data read */
+       retval = ab8500_write(ab8500, AB8500_RTC_READ_REQ_REG,
+                             RTC_READ_REQUEST);
+       if (retval < 0)
+               return retval;
+
+       /* Early AB8500 chips will not clear the rtc read request bit */
+       if (ab8500->revision == 0) {
+               msleep(1);
+       } else {
+               /* Wait for some cycles after enabling the rtc read in ab8500 */
+               while (time_before(jiffies, timeout)) {
+                       retval = ab8500_read(ab8500, AB8500_RTC_READ_REQ_REG);
+                       if (retval < 0)
+                               return retval;
+
+                       if (!(retval & RTC_READ_REQUEST))
+                               break;
+
+                       msleep(1);
+               }
+       }
+
+       /* Read the Watchtime registers */
+       for (i = 0; i < ARRAY_SIZE(ab8500_rtc_time_regs); i++) {
+               retval = ab8500_read(ab8500, ab8500_rtc_time_regs[i]);
+               if (retval < 0)
+                       return retval;
+               buf[i] = retval;
+       }
+
+       mins = (buf[0] << 16) | (buf[1] << 8) | buf[2];
+
+       secs =  (buf[3] << 8) | buf[4];
+       secs =  secs / COUNTS_PER_SEC;
+       secs =  secs + (mins * 60);
+
+       /* Add back the initially subtracted number of seconds */
+       secs += get_elapsed_seconds(AB8500_RTC_EPOCH);
+
+       rtc_time_to_tm(secs, tm);
+       return rtc_valid_tm(tm);
+}
+
+static int ab8500_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
+       int retval, i;
+       unsigned char buf[ARRAY_SIZE(ab8500_rtc_time_regs)];
+       unsigned long no_secs, no_mins, secs = 0;
+
+       if (tm->tm_year < (AB8500_RTC_EPOCH - 1900)) {
+               dev_dbg(dev, "year should be equal to or greater than %d\n",
+                               AB8500_RTC_EPOCH);
+               return -EINVAL;
+       }
+
+       /* Get the number of seconds since 1970 */
+       rtc_tm_to_time(tm, &secs);
+
+       /*
+        * Convert it to the number of seconds since 01-01-2000 00:00:00, since
+        * we only have a small counter in the RTC.
+        */
+       secs -= get_elapsed_seconds(AB8500_RTC_EPOCH);
+
+       no_mins = secs / 60;
+
+       no_secs = secs % 60;
+       /* Make the seconds count as per the RTC resolution */
+       no_secs = no_secs * COUNTS_PER_SEC;
+
+       buf[4] = no_secs & 0xFF;
+       buf[3] = (no_secs >> 8) & 0xFF;
+
+       buf[2] = no_mins & 0xFF;
+       buf[1] = (no_mins >> 8) & 0xFF;
+       buf[0] = (no_mins >> 16) & 0xFF;
+
+       for (i = 0; i < ARRAY_SIZE(ab8500_rtc_time_regs); i++) {
+               retval = ab8500_write(ab8500, ab8500_rtc_time_regs[i], buf[i]);
+               if (retval < 0)
+                       return retval;
+       }
+
+       /* Request a data write */
+       return ab8500_write(ab8500, AB8500_RTC_READ_REQ_REG, RTC_WRITE_REQUEST);
+}
+
+static int ab8500_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+       struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
+       int retval, i;
+       int rtc_ctrl;
+       unsigned char buf[ARRAY_SIZE(ab8500_rtc_alarm_regs)];
+       unsigned long secs, mins;
+
+       /* Check if the alarm is enabled or not */
+       rtc_ctrl = ab8500_read(ab8500, AB8500_RTC_STAT_REG);
+       if (rtc_ctrl < 0)
+               return rtc_ctrl;
+
+       if (rtc_ctrl & RTC_ALARM_ENA)
+               alarm->enabled = 1;
+       else
+               alarm->enabled = 0;
+
+       alarm->pending = 0;
+
+       for (i = 0; i < ARRAY_SIZE(ab8500_rtc_alarm_regs); i++) {
+               retval = ab8500_read(ab8500, ab8500_rtc_alarm_regs[i]);
+               if (retval < 0)
+                       return retval;
+               buf[i] = retval;
+       }
+
+       mins = (buf[0] << 16) | (buf[1] << 8) | (buf[2]);
+       secs = mins * 60;
+
+       /* Add back the initially subtracted number of seconds */
+       secs += get_elapsed_seconds(AB8500_RTC_EPOCH);
+
+       rtc_time_to_tm(secs, &alarm->time);
+
+       return rtc_valid_tm(&alarm->time);
+}
+
+static int ab8500_rtc_irq_enable(struct device *dev, unsigned int enabled)
+{
+       struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
+
+       return ab8500_set_bits(ab8500, AB8500_RTC_STAT_REG, RTC_ALARM_ENA,
+                              enabled ? RTC_ALARM_ENA : 0);
+}
+
+static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+       struct ab8500 *ab8500 = dev_get_drvdata(dev->parent);
+       int retval, i;
+       unsigned char buf[ARRAY_SIZE(ab8500_rtc_alarm_regs)];
+       unsigned long mins, secs = 0;
+
+       if (alarm->time.tm_year < (AB8500_RTC_EPOCH - 1900)) {
+               dev_dbg(dev, "year should be equal to or greater than %d\n",
+                               AB8500_RTC_EPOCH);
+               return -EINVAL;
+       }
+
+       /* Get the number of seconds since 1970 */
+       rtc_tm_to_time(&alarm->time, &secs);
+
+       /*
+        * Convert it to the number of seconds since 01-01-2000 00:00:00, since
+        * we only have a small counter in the RTC.
+        */
+       secs -= get_elapsed_seconds(AB8500_RTC_EPOCH);
+
+       mins = secs / 60;
+
+       buf[2] = mins & 0xFF;
+       buf[1] = (mins >> 8) & 0xFF;
+       buf[0] = (mins >> 16) & 0xFF;
+
+       /* Set the alarm time */
+       for (i = 0; i < ARRAY_SIZE(ab8500_rtc_alarm_regs); i++) {
+               retval = ab8500_write(ab8500, ab8500_rtc_alarm_regs[i], buf[i]);
+               if (retval < 0)
+                       return retval;
+       }
+
+       return ab8500_rtc_irq_enable(dev, alarm->enabled);
+}
+
+static irqreturn_t rtc_alarm_handler(int irq, void *data)
+{
+       struct rtc_device *rtc = data;
+       unsigned long events = RTC_IRQF | RTC_AF;
+
+       dev_dbg(&rtc->dev, "%s\n", __func__);
+       rtc_update_irq(rtc, 1, events);
+
+       return IRQ_HANDLED;
+}
+
+static const struct rtc_class_ops ab8500_rtc_ops = {
+       .read_time              = ab8500_rtc_read_time,
+       .set_time               = ab8500_rtc_set_time,
+       .read_alarm             = ab8500_rtc_read_alarm,
+       .set_alarm              = ab8500_rtc_set_alarm,
+       .alarm_irq_enable       = ab8500_rtc_irq_enable,
+};
+
+static int __devinit ab8500_rtc_probe(struct platform_device *pdev)
+{
+       struct ab8500 *ab8500 = dev_get_drvdata(pdev->dev.parent);
+       int err;
+       struct rtc_device *rtc;
+       int rtc_ctrl;
+       int irq;
+
+       irq = platform_get_irq_byname(pdev, "ALARM");
+       if (irq < 0)
+               return irq;
+
+       /* For RTC supply test */
+       err = ab8500_set_bits(ab8500, AB8500_RTC_STAT_REG, RTC_STATUS_DATA,
+                       RTC_STATUS_DATA);
+       if (err < 0)
+               return err;
+
+       /* Wait for reset by the PorRtc */
+       msleep(1);
+
+       rtc_ctrl = ab8500_read(ab8500, AB8500_RTC_STAT_REG);
+       if (rtc_ctrl < 0)
+               return rtc_ctrl;
+
+       /* Check if the RTC Supply fails */
+       if (!(rtc_ctrl & RTC_STATUS_DATA)) {
+               dev_err(&pdev->dev, "RTC supply failure\n");
+               return -ENODEV;
+       }
+
+       rtc = rtc_device_register("ab8500-rtc", &pdev->dev, &ab8500_rtc_ops,
+                       THIS_MODULE);
+       if (IS_ERR(rtc)) {
+               dev_err(&pdev->dev, "Registration failed\n");
+               err = PTR_ERR(rtc);
+               return err;
+       }
+
+       err = request_threaded_irq(irq, NULL, rtc_alarm_handler, 0,
+                                  "ab8500-rtc", rtc);
+       if (err < 0) {
+               rtc_device_unregister(rtc);
+               return err;
+       }
+
+       platform_set_drvdata(pdev, rtc);
+
+       return 0;
+}
+
+static int __devexit ab8500_rtc_remove(struct platform_device *pdev)
+{
+       struct rtc_device *rtc = platform_get_drvdata(pdev);
+       int irq = platform_get_irq_byname(pdev, "ALARM");
+
+       free_irq(irq, rtc);
+       rtc_device_unregister(rtc);
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+static struct platform_driver ab8500_rtc_driver = {
+       .driver = {
+               .name = "ab8500-rtc",
+               .owner = THIS_MODULE,
+       },
+       .probe  = ab8500_rtc_probe,
+       .remove = __devexit_p(ab8500_rtc_remove),
+};
+
+static int __init ab8500_rtc_init(void)
+{
+       return platform_driver_register(&ab8500_rtc_driver);
+}
+
+static void __exit ab8500_rtc_exit(void)
+{
+       platform_driver_unregister(&ab8500_rtc_driver);
+}
+
+module_init(ab8500_rtc_init);
+module_exit(ab8500_rtc_exit);
+MODULE_AUTHOR("Virupax Sadashivpetimath <virupax.sadashivpetimath@stericsson.com>");
+MODULE_DESCRIPTION("AB8500 RTC Driver");
+MODULE_LICENSE("GPL v2");
index 038095d..6dc4e62 100644 (file)
@@ -595,10 +595,6 @@ static void wdt_disable(void)
 static ssize_t wdt_write(struct file *file, const char __user *buf,
                         size_t count, loff_t *ppos)
 {
-       /*  Can't seek (pwrite) on this device
-       if (ppos != &file->f_pos)
-       return -ESPIPE;
-       */
        if (count) {
                wdt_ping();
                return 1;
@@ -707,7 +703,7 @@ static int wdt_open(struct inode *inode, struct file *file)
                 */
                wdt_is_open = 1;
                unlock_kernel();
-               return 0;
+               return nonseekable_open(inode, file);
        }
        return -ENODEV;
 }
index 308541f..1bb5d3f 100644 (file)
@@ -1,34 +1,31 @@
 #include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/zorro.h>
 
-#include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/amigaints.h>
 #include <asm/amigahw.h>
-#include <linux/zorro.h>
-#include <asm/irq.h>
-#include <linux/spinlock.h>
 
 #include "scsi.h"
-#include <scsi/scsi_host.h>
 #include "wd33c93.h"
 #include "a2091.h"
 
-#include <linux/stat.h>
-
 
-static int a2091_release(struct Scsi_Host *instance);
+struct a2091_hostdata {
+       struct WD33C93_hostdata wh;
+       struct a2091_scsiregs *regs;
+};
 
 static irqreturn_t a2091_intr(int irq, void *data)
 {
        struct Scsi_Host *instance = data;
-       a2091_scsiregs *regs = (a2091_scsiregs *)(instance->base);
-       unsigned int status = regs->ISTR;
+       struct a2091_hostdata *hdata = shost_priv(instance);
+       unsigned int status = hdata->regs->ISTR;
        unsigned long flags;
 
        if (!(status & (ISTR_INT_F | ISTR_INT_P)) || !(status & ISTR_INTS))
@@ -43,38 +40,39 @@ static irqreturn_t a2091_intr(int irq, void *data)
 static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
        struct Scsi_Host *instance = cmd->device->host;
-       struct WD33C93_hostdata *hdata = shost_priv(instance);
-       a2091_scsiregs *regs = (a2091_scsiregs *)(instance->base);
+       struct a2091_hostdata *hdata = shost_priv(instance);
+       struct WD33C93_hostdata *wh = &hdata->wh;
+       struct a2091_scsiregs *regs = hdata->regs;
        unsigned short cntr = CNTR_PDMD | CNTR_INTEN;
        unsigned long addr = virt_to_bus(cmd->SCp.ptr);
 
        /* don't allow DMA if the physical address is bad */
        if (addr & A2091_XFER_MASK) {
-               hdata->dma_bounce_len = (cmd->SCp.this_residual + 511) & ~0x1ff;
-               hdata->dma_bounce_buffer = kmalloc(hdata->dma_bounce_len,
-                                                  GFP_KERNEL);
+               wh->dma_bounce_len = (cmd->SCp.this_residual + 511) & ~0x1ff;
+               wh->dma_bounce_buffer = kmalloc(wh->dma_bounce_len,
+                                               GFP_KERNEL);
 
                /* can't allocate memory; use PIO */
-               if (!hdata->dma_bounce_buffer) {
-                       hdata->dma_bounce_len = 0;
+               if (!wh->dma_bounce_buffer) {
+                       wh->dma_bounce_len = 0;
                        return 1;
                }
 
                /* get the physical address of the bounce buffer */
-               addr = virt_to_bus(hdata->dma_bounce_buffer);
+               addr = virt_to_bus(wh->dma_bounce_buffer);
 
                /* the bounce buffer may not be in the first 16M of physmem */
                if (addr & A2091_XFER_MASK) {
                        /* we could use chipmem... maybe later */
-                       kfree(hdata->dma_bounce_buffer);
-                       hdata->dma_bounce_buffer = NULL;
-                       hdata->dma_bounce_len = 0;
+                       kfree(wh->dma_bounce_buffer);
+                       wh->dma_bounce_buffer = NULL;
+                       wh->dma_bounce_len = 0;
                        return 1;
                }
 
                if (!dir_in) {
                        /* copy to bounce buffer for a write */
-                       memcpy(hdata->dma_bounce_buffer, cmd->SCp.ptr,
+                       memcpy(wh->dma_bounce_buffer, cmd->SCp.ptr,
                               cmd->SCp.this_residual);
                }
        }
@@ -84,7 +82,7 @@ static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
                cntr |= CNTR_DDIR;
 
        /* remember direction */
-       hdata->dma_dir = dir_in;
+       wh->dma_dir = dir_in;
 
        regs->CNTR = cntr;
 
@@ -108,20 +106,21 @@ static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
                     int status)
 {
-       struct WD33C93_hostdata *hdata = shost_priv(instance);
-       a2091_scsiregs *regs = (a2091_scsiregs *)(instance->base);
+       struct a2091_hostdata *hdata = shost_priv(instance);
+       struct WD33C93_hostdata *wh = &hdata->wh;
+       struct a2091_scsiregs *regs = hdata->regs;
 
        /* disable SCSI interrupts */
        unsigned short cntr = CNTR_PDMD;
 
-       if (!hdata->dma_dir)
+       if (!wh->dma_dir)
                cntr |= CNTR_DDIR;
 
        /* disable SCSI interrupts */
        regs->CNTR = cntr;
 
        /* flush if we were reading */
-       if (hdata->dma_dir) {
+       if (wh->dma_dir) {
                regs->FLUSH = 1;
                while (!(regs->ISTR & ISTR_FE_FLG))
                        ;
@@ -137,95 +136,37 @@ static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
        regs->CNTR = CNTR_PDMD | CNTR_INTEN;
 
        /* copy from a bounce buffer, if necessary */
-       if (status && hdata->dma_bounce_buffer) {
-               if (hdata->dma_dir)
-                       memcpy(SCpnt->SCp.ptr, hdata->dma_bounce_buffer,
+       if (status && wh->dma_bounce_buffer) {
+               if (wh->dma_dir)
+                       memcpy(SCpnt->SCp.ptr, wh->dma_bounce_buffer,
                               SCpnt->SCp.this_residual);
-               kfree(hdata->dma_bounce_buffer);
-               hdata->dma_bounce_buffer = NULL;
-               hdata->dma_bounce_len = 0;
-       }
-}
-
-static int __init a2091_detect(struct scsi_host_template *tpnt)
-{
-       static unsigned char called = 0;
-       struct Scsi_Host *instance;
-       unsigned long address;
-       struct zorro_dev *z = NULL;
-       wd33c93_regs wdregs;
-       a2091_scsiregs *regs;
-       struct WD33C93_hostdata *hdata;
-       int num_a2091 = 0;
-
-       if (!MACH_IS_AMIGA || called)
-               return 0;
-       called = 1;
-
-       tpnt->proc_name = "A2091";
-       tpnt->proc_info = &wd33c93_proc_info;
-
-       while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
-               if (z->id != ZORRO_PROD_CBM_A590_A2091_1 &&
-                   z->id != ZORRO_PROD_CBM_A590_A2091_2)
-                       continue;
-               address = z->resource.start;
-               if (!request_mem_region(address, 256, "wd33c93"))
-                       continue;
-
-               instance = scsi_register(tpnt, sizeof(struct WD33C93_hostdata));
-               if (instance == NULL)
-                       goto release;
-               instance->base = ZTWO_VADDR(address);
-               instance->irq = IRQ_AMIGA_PORTS;
-               instance->unique_id = z->slotaddr;
-               regs = (a2091_scsiregs *)(instance->base);
-               regs->DAWR = DAWR_A2091;
-               wdregs.SASR = &regs->SASR;
-               wdregs.SCMD = &regs->SCMD;
-               hdata = shost_priv(instance);
-               hdata->no_sync = 0xff;
-               hdata->fast = 0;
-               hdata->dma_mode = CTRL_DMA;
-               wd33c93_init(instance, wdregs, dma_setup, dma_stop,
-                            WD33C93_FS_8_10);
-               if (request_irq(IRQ_AMIGA_PORTS, a2091_intr, IRQF_SHARED,
-                               "A2091 SCSI", instance))
-                       goto unregister;
-               regs->CNTR = CNTR_PDMD | CNTR_INTEN;
-               num_a2091++;
-               continue;
-
-unregister:
-               scsi_unregister(instance);
-release:
-               release_mem_region(address, 256);
+               kfree(wh->dma_bounce_buffer);
+               wh->dma_bounce_buffer = NULL;
+               wh->dma_bounce_len = 0;
        }
-
-       return num_a2091;
 }
 
 static int a2091_bus_reset(struct scsi_cmnd *cmd)
 {
+       struct Scsi_Host *instance = cmd->device->host;
+
        /* FIXME perform bus-specific reset */
 
        /* FIXME 2: kill this function, and let midlayer fall back
           to the same action, calling wd33c93_host_reset() */
 
-       spin_lock_irq(cmd->device->host->host_lock);
+       spin_lock_irq(instance->host_lock);
        wd33c93_host_reset(cmd);
-       spin_unlock_irq(cmd->device->host->host_lock);
+       spin_unlock_irq(instance->host_lock);
 
        return SUCCESS;
 }
 
-#define HOSTS_C
-
-static struct scsi_host_template driver_template = {
-       .proc_name              = "A2901",
+static struct scsi_host_template a2091_scsi_template = {
+       .module                 = THIS_MODULE,
        .name                   = "Commodore A2091/A590 SCSI",
-       .detect                 = a2091_detect,
-       .release                = a2091_release,
+       .proc_info              = wd33c93_proc_info,
+       .proc_name              = "A2901",
        .queuecommand           = wd33c93_queuecommand,
        .eh_abort_handler       = wd33c93_abort,
        .eh_bus_reset_handler   = a2091_bus_reset,
@@ -237,19 +178,103 @@ static struct scsi_host_template driver_template = {
        .use_clustering         = DISABLE_CLUSTERING
 };
 
+static int __devinit a2091_probe(struct zorro_dev *z,
+                                const struct zorro_device_id *ent)
+{
+       struct Scsi_Host *instance;
+       int error;
+       struct a2091_scsiregs *regs;
+       wd33c93_regs wdregs;
+       struct a2091_hostdata *hdata;
 
-#include "scsi_module.c"
+       if (!request_mem_region(z->resource.start, 256, "wd33c93"))
+               return -EBUSY;
 
-static int a2091_release(struct Scsi_Host *instance)
+       instance = scsi_host_alloc(&a2091_scsi_template,
+                                  sizeof(struct a2091_hostdata));
+       if (!instance) {
+               error = -ENOMEM;
+               goto fail_alloc;
+       }
+
+       instance->irq = IRQ_AMIGA_PORTS;
+       instance->unique_id = z->slotaddr;
+
+       regs = (struct a2091_scsiregs *)ZTWO_VADDR(z->resource.start);
+       regs->DAWR = DAWR_A2091;
+
+       wdregs.SASR = &regs->SASR;
+       wdregs.SCMD = &regs->SCMD;
+
+       hdata = shost_priv(instance);
+       hdata->wh.no_sync = 0xff;
+       hdata->wh.fast = 0;
+       hdata->wh.dma_mode = CTRL_DMA;
+       hdata->regs = regs;
+
+       wd33c93_init(instance, wdregs, dma_setup, dma_stop, WD33C93_FS_8_10);
+       error = request_irq(IRQ_AMIGA_PORTS, a2091_intr, IRQF_SHARED,
+                           "A2091 SCSI", instance);
+       if (error)
+               goto fail_irq;
+
+       regs->CNTR = CNTR_PDMD | CNTR_INTEN;
+
+       error = scsi_add_host(instance, NULL);
+       if (error)
+               goto fail_host;
+
+       zorro_set_drvdata(z, instance);
+
+       scsi_scan_host(instance);
+       return 0;
+
+fail_host:
+       free_irq(IRQ_AMIGA_PORTS, instance);
+fail_irq:
+       scsi_host_put(instance);
+fail_alloc:
+       release_mem_region(z->resource.start, 256);
+       return error;
+}
+
+static void __devexit a2091_remove(struct zorro_dev *z)
 {
-#ifdef MODULE
-       a2091_scsiregs *regs = (a2091_scsiregs *)(instance->base);
+       struct Scsi_Host *instance = zorro_get_drvdata(z);
+       struct a2091_hostdata *hdata = shost_priv(instance);
 
-       regs->CNTR = 0;
-       release_mem_region(ZTWO_PADDR(instance->base), 256);
+       hdata->regs->CNTR = 0;
+       scsi_remove_host(instance);
        free_irq(IRQ_AMIGA_PORTS, instance);
-#endif
-       return 1;
+       scsi_host_put(instance);
+       release_mem_region(z->resource.start, 256);
+}
+
+static struct zorro_device_id a2091_zorro_tbl[] __devinitdata = {
+       { ZORRO_PROD_CBM_A590_A2091_1 },
+       { ZORRO_PROD_CBM_A590_A2091_2 },
+       { 0 }
+};
+MODULE_DEVICE_TABLE(zorro, a2091_zorro_tbl);
+
+static struct zorro_driver a2091_driver = {
+       .name           = "a2091",
+       .id_table       = a2091_zorro_tbl,
+       .probe          = a2091_probe,
+       .remove         = __devexit_p(a2091_remove),
+};
+
+static int __init a2091_init(void)
+{
+       return zorro_register_driver(&a2091_driver);
+}
+module_init(a2091_init);
+
+static void __exit a2091_exit(void)
+{
+       zorro_unregister_driver(&a2091_driver);
 }
+module_exit(a2091_exit);
 
+MODULE_DESCRIPTION("Commodore A2091/A590 SCSI");
 MODULE_LICENSE("GPL");
index 1c3daa1..794b8e6 100644 (file)
@@ -25,7 +25,7 @@
  */
 #define A2091_XFER_MASK                (0xff000001)
 
-typedef struct {
+struct a2091_scsiregs {
                 unsigned char  pad1[64];
        volatile unsigned short ISTR;
        volatile unsigned short CNTR;
@@ -44,7 +44,7 @@ typedef struct {
        volatile unsigned short CINT;
                 unsigned char  pad7[2];
        volatile unsigned short FLUSH;
-} a2091_scsiregs;
+};
 
 #define DAWR_A2091             (3)
 
index bc6eb69..d946802 100644 (file)
@@ -1,53 +1,52 @@
 #include <linux/types.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/platform_device.h>
 
-#include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/amigaints.h>
 #include <asm/amigahw.h>
-#include <asm/irq.h>
 
 #include "scsi.h"
-#include <scsi/scsi_host.h>
 #include "wd33c93.h"
 #include "a3000.h"
 
-#include <linux/stat.h>
-
 
-#define DMA(ptr)       ((a3000_scsiregs *)((ptr)->base))
-
-static struct Scsi_Host *a3000_host = NULL;
-
-static int a3000_release(struct Scsi_Host *instance);
+struct a3000_hostdata {
+       struct WD33C93_hostdata wh;
+       struct a3000_scsiregs *regs;
+};
 
-static irqreturn_t a3000_intr(int irq, void *dummy)
+static irqreturn_t a3000_intr(int irq, void *data)
 {
+       struct Scsi_Host *instance = data;
+       struct a3000_hostdata *hdata = shost_priv(instance);
+       unsigned int status = hdata->regs->ISTR;
        unsigned long flags;
-       unsigned int status = DMA(a3000_host)->ISTR;
 
        if (!(status & ISTR_INT_P))
                return IRQ_NONE;
        if (status & ISTR_INTS) {
-               spin_lock_irqsave(a3000_host->host_lock, flags);
-               wd33c93_intr(a3000_host);
-               spin_unlock_irqrestore(a3000_host->host_lock, flags);
+               spin_lock_irqsave(instance->host_lock, flags);
+               wd33c93_intr(instance);
+               spin_unlock_irqrestore(instance->host_lock, flags);
                return IRQ_HANDLED;
        }
-       printk("Non-serviced A3000 SCSI-interrupt? ISTR = %02x\n", status);
+       pr_warning("Non-serviced A3000 SCSI-interrupt? ISTR = %02x\n", status);
        return IRQ_NONE;
 }
 
 static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
-       struct WD33C93_hostdata *hdata = shost_priv(a3000_host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct a3000_hostdata *hdata = shost_priv(instance);
+       struct WD33C93_hostdata *wh = &hdata->wh;
+       struct a3000_scsiregs *regs = hdata->regs;
        unsigned short cntr = CNTR_PDMD | CNTR_INTEN;
        unsigned long addr = virt_to_bus(cmd->SCp.ptr);
 
@@ -58,23 +57,23 @@ static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
         * buffer
         */
        if (addr & A3000_XFER_MASK) {
-               hdata->dma_bounce_len = (cmd->SCp.this_residual + 511) & ~0x1ff;
-               hdata->dma_bounce_buffer = kmalloc(hdata->dma_bounce_len,
-                                                  GFP_KERNEL);
+               wh->dma_bounce_len = (cmd->SCp.this_residual + 511) & ~0x1ff;
+               wh->dma_bounce_buffer = kmalloc(wh->dma_bounce_len,
+                                               GFP_KERNEL);
 
                /* can't allocate memory; use PIO */
-               if (!hdata->dma_bounce_buffer) {
-                       hdata->dma_bounce_len = 0;
+               if (!wh->dma_bounce_buffer) {
+                       wh->dma_bounce_len = 0;
                        return 1;
                }
 
                if (!dir_in) {
                        /* copy to bounce buffer for a write */
-                       memcpy(hdata->dma_bounce_buffer, cmd->SCp.ptr,
+                       memcpy(wh->dma_bounce_buffer, cmd->SCp.ptr,
                               cmd->SCp.this_residual);
                }
 
-               addr = virt_to_bus(hdata->dma_bounce_buffer);
+               addr = virt_to_bus(wh->dma_bounce_buffer);
        }
 
        /* setup dma direction */
@@ -82,12 +81,12 @@ static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
                cntr |= CNTR_DDIR;
 
        /* remember direction */
-       hdata->dma_dir = dir_in;
+       wh->dma_dir = dir_in;
 
-       DMA(a3000_host)->CNTR = cntr;
+       regs->CNTR = cntr;
 
        /* setup DMA *physical* address */
-       DMA(a3000_host)->ACR = addr;
+       regs->ACR = addr;
 
        if (dir_in) {
                /* invalidate any cache */
@@ -99,7 +98,7 @@ static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 
        /* start DMA */
        mb();                   /* make sure setup is completed */
-       DMA(a3000_host)->ST_DMA = 1;
+       regs->ST_DMA = 1;
        mb();                   /* make sure DMA has started before next IO */
 
        /* return success */
@@ -109,22 +108,24 @@ static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
                     int status)
 {
-       struct WD33C93_hostdata *hdata = shost_priv(instance);
+       struct a3000_hostdata *hdata = shost_priv(instance);
+       struct WD33C93_hostdata *wh = &hdata->wh;
+       struct a3000_scsiregs *regs = hdata->regs;
 
        /* disable SCSI interrupts */
        unsigned short cntr = CNTR_PDMD;
 
-       if (!hdata->dma_dir)
+       if (!wh->dma_dir)
                cntr |= CNTR_DDIR;
 
-       DMA(instance)->CNTR = cntr;
+       regs->CNTR = cntr;
        mb();                   /* make sure CNTR is updated before next IO */
 
        /* flush if we were reading */
-       if (hdata->dma_dir) {
-               DMA(instance)->FLUSH = 1;
+       if (wh->dma_dir) {
+               regs->FLUSH = 1;
                mb();           /* don't allow prefetch */
-               while (!(DMA(instance)->ISTR & ISTR_FE_FLG))
+               while (!(regs->ISTR & ISTR_FE_FLG))
                        barrier();
                mb();           /* no IO until FLUSH is done */
        }
@@ -133,96 +134,54 @@ static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
        /* I think that this CINT is only necessary if you are
         * using the terminal count features.   HM 7 Mar 1994
         */
-       DMA(instance)->CINT = 1;
+       regs->CINT = 1;
 
        /* stop DMA */
-       DMA(instance)->SP_DMA = 1;
+       regs->SP_DMA = 1;
        mb();                   /* make sure DMA is stopped before next IO */
 
        /* restore the CONTROL bits (minus the direction flag) */
-       DMA(instance)->CNTR = CNTR_PDMD | CNTR_INTEN;
+       regs->CNTR = CNTR_PDMD | CNTR_INTEN;
        mb();                   /* make sure CNTR is updated before next IO */
 
        /* copy from a bounce buffer, if necessary */
-       if (status && hdata->dma_bounce_buffer) {
+       if (status && wh->dma_bounce_buffer) {
                if (SCpnt) {
-                       if (hdata->dma_dir && SCpnt)
-                               memcpy(SCpnt->SCp.ptr,
-                                      hdata->dma_bounce_buffer,
+                       if (wh->dma_dir && SCpnt)
+                               memcpy(SCpnt->SCp.ptr, wh->dma_bounce_buffer,
                                       SCpnt->SCp.this_residual);
-                       kfree(hdata->dma_bounce_buffer);
-                       hdata->dma_bounce_buffer = NULL;
-                       hdata->dma_bounce_len = 0;
+                       kfree(wh->dma_bounce_buffer);
+                       wh->dma_bounce_buffer = NULL;
+                       wh->dma_bounce_len = 0;
                } else {
-                       kfree(hdata->dma_bounce_buffer);
-                       hdata->dma_bounce_buffer = NULL;
-                       hdata->dma_bounce_len = 0;
+                       kfree(wh->dma_bounce_buffer);
+                       wh->dma_bounce_buffer = NULL;
+                       wh->dma_bounce_len = 0;
                }
        }
 }
 
-static int __init a3000_detect(struct scsi_host_template *tpnt)
-{
-       wd33c93_regs regs;
-       struct WD33C93_hostdata *hdata;
-
-       if (!MACH_IS_AMIGA || !AMIGAHW_PRESENT(A3000_SCSI))
-               return 0;
-       if (!request_mem_region(0xDD0000, 256, "wd33c93"))
-               return 0;
-
-       tpnt->proc_name = "A3000";
-       tpnt->proc_info = &wd33c93_proc_info;
-
-       a3000_host = scsi_register(tpnt, sizeof(struct WD33C93_hostdata));
-       if (a3000_host == NULL)
-               goto fail_register;
-
-       a3000_host->base = ZTWO_VADDR(0xDD0000);
-       a3000_host->irq = IRQ_AMIGA_PORTS;
-       DMA(a3000_host)->DAWR = DAWR_A3000;
-       regs.SASR = &(DMA(a3000_host)->SASR);
-       regs.SCMD = &(DMA(a3000_host)->SCMD);
-       hdata = shost_priv(a3000_host);
-       hdata->no_sync = 0xff;
-       hdata->fast = 0;
-       hdata->dma_mode = CTRL_DMA;
-       wd33c93_init(a3000_host, regs, dma_setup, dma_stop, WD33C93_FS_12_15);
-       if (request_irq(IRQ_AMIGA_PORTS, a3000_intr, IRQF_SHARED, "A3000 SCSI",
-                       a3000_intr))
-               goto fail_irq;
-       DMA(a3000_host)->CNTR = CNTR_PDMD | CNTR_INTEN;
-
-       return 1;
-
-fail_irq:
-       scsi_unregister(a3000_host);
-fail_register:
-       release_mem_region(0xDD0000, 256);
-       return 0;
-}
-
 static int a3000_bus_reset(struct scsi_cmnd *cmd)
 {
+       struct Scsi_Host *instance = cmd->device->host;
+
        /* FIXME perform bus-specific reset */
 
        /* FIXME 2: kill this entire function, which should
           cause mid-layer to call wd33c93_host_reset anyway? */
 
-       spin_lock_irq(cmd->device->host->host_lock);
+       spin_lock_irq(instance->host_lock);
        wd33c93_host_reset(cmd);
-       spin_unlock_irq(cmd->device->host->host_lock);
+       spin_unlock_irq(instance->host_lock);
 
        return SUCCESS;
 }
 
-#define HOSTS_C
-
-static struct scsi_host_template driver_template = {
-       .proc_name              = "A3000",
+static struct scsi_host_template amiga_a3000_scsi_template = {
+       .module                 = THIS_MODULE,
        .name                   = "Amiga 3000 built-in SCSI",
-       .detect                 = a3000_detect,
-       .release                = a3000_release,
+       .proc_info              = wd33c93_proc_info,
+       .proc_name              = "A3000",
        .queuecommand           = wd33c93_queuecommand,
        .eh_abort_handler       = wd33c93_abort,
        .eh_bus_reset_handler   = a3000_bus_reset,
@@ -234,15 +193,104 @@ static struct scsi_host_template driver_template = {
        .use_clustering         = ENABLE_CLUSTERING
 };
 
+static int __init amiga_a3000_scsi_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+       struct Scsi_Host *instance;
+       int error;
+       struct a3000_scsiregs *regs;
+       wd33c93_regs wdregs;
+       struct a3000_hostdata *hdata;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
+
+       if (!request_mem_region(res->start, resource_size(res), "wd33c93"))
+               return -EBUSY;
+
+       instance = scsi_host_alloc(&amiga_a3000_scsi_template,
+                                  sizeof(struct a3000_hostdata));
+       if (!instance) {
+               error = -ENOMEM;
+               goto fail_alloc;
+       }
+
+       instance->irq = IRQ_AMIGA_PORTS;
 
-#include "scsi_module.c"
+       regs = (struct a3000_scsiregs *)ZTWO_VADDR(res->start);
+       regs->DAWR = DAWR_A3000;
+
+       wdregs.SASR = &regs->SASR;
+       wdregs.SCMD = &regs->SCMD;
+
+       hdata = shost_priv(instance);
+       hdata->wh.no_sync = 0xff;
+       hdata->wh.fast = 0;
+       hdata->wh.dma_mode = CTRL_DMA;
+       hdata->regs = regs;
+
+       wd33c93_init(instance, wdregs, dma_setup, dma_stop, WD33C93_FS_12_15);
+       error = request_irq(IRQ_AMIGA_PORTS, a3000_intr, IRQF_SHARED,
+                           "A3000 SCSI", instance);
+       if (error)
+               goto fail_irq;
+
+       regs->CNTR = CNTR_PDMD | CNTR_INTEN;
+
+       error = scsi_add_host(instance, NULL);
+       if (error)
+               goto fail_host;
+
+       platform_set_drvdata(pdev, instance);
+
+       scsi_scan_host(instance);
+       return 0;
+
+fail_host:
+       free_irq(IRQ_AMIGA_PORTS, instance);
+fail_irq:
+       scsi_host_put(instance);
+fail_alloc:
+       release_mem_region(res->start, resource_size(res));
+       return error;
+}
+
+static int __exit amiga_a3000_scsi_remove(struct platform_device *pdev)
+{
+       struct Scsi_Host *instance = platform_get_drvdata(pdev);
+       struct a3000_hostdata *hdata = shost_priv(instance);
+       struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+       hdata->regs->CNTR = 0;
+       scsi_remove_host(instance);
+       free_irq(IRQ_AMIGA_PORTS, instance);
+       scsi_host_put(instance);
+       release_mem_region(res->start, resource_size(res));
+       return 0;
+}
+
+static struct platform_driver amiga_a3000_scsi_driver = {
+       .remove = __exit_p(amiga_a3000_scsi_remove),
+       .driver   = {
+               .name   = "amiga-a3000-scsi",
+               .owner  = THIS_MODULE,
+       },
+};
+
+static int __init amiga_a3000_scsi_init(void)
+{
+       return platform_driver_probe(&amiga_a3000_scsi_driver,
+                                    amiga_a3000_scsi_probe);
+}
+module_init(amiga_a3000_scsi_init);
 
-static int a3000_release(struct Scsi_Host *instance)
+static void __exit amiga_a3000_scsi_exit(void)
 {
-       DMA(instance)->CNTR = 0;
-       release_mem_region(0xDD0000, 256);
-       free_irq(IRQ_AMIGA_PORTS, a3000_intr);
-       return 1;
+       platform_driver_unregister(&amiga_a3000_scsi_driver);
 }
+module_exit(amiga_a3000_scsi_exit);
 
+MODULE_DESCRIPTION("Amiga 3000 built-in SCSI");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:amiga-a3000-scsi");
index 684813e..49db4a3 100644 (file)
@@ -25,7 +25,7 @@
  */
 #define A3000_XFER_MASK                (0x00000003)
 
-typedef struct {
+struct a3000_scsiregs {
                 unsigned char  pad1[2];
        volatile unsigned short DAWR;
        volatile unsigned int   WTC;
@@ -46,7 +46,7 @@ typedef struct {
        volatile unsigned char  SASR;
                 unsigned char  pad9;
        volatile unsigned char  SCMD;
-} a3000_scsiregs;
+};
 
 #define DAWR_A3000             (3)
 
index 11ae6be..23c76f4 100644 (file)
 
 #include "53c700.h"
 
-MODULE_AUTHOR("Alan Hourihane <alanh@fairlite.demon.co.uk> / Kars de Jong <jongk@linux-m68k.org>");
-MODULE_DESCRIPTION("Amiga A4000T NCR53C710 driver");
-MODULE_LICENSE("GPL");
-
 
 static struct scsi_host_template a4000t_scsi_driver_template = {
        .name           = "A4000T builtin SCSI",
@@ -32,30 +28,35 @@ static struct scsi_host_template a4000t_scsi_driver_template = {
        .module         = THIS_MODULE,
 };
 
-static struct platform_device *a4000t_scsi_device;
 
-#define A4000T_SCSI_ADDR 0xdd0040
+#define A4000T_SCSI_OFFSET     0x40
 
-static int __devinit a4000t_probe(struct platform_device *dev)
+static int __init amiga_a4000t_scsi_probe(struct platform_device *pdev)
 {
-       struct Scsi_Host *host;
+       struct resource *res;
+       phys_addr_t scsi_addr;
        struct NCR_700_Host_Parameters *hostdata;
+       struct Scsi_Host *host;
 
-       if (!(MACH_IS_AMIGA && AMIGAHW_PRESENT(A4000_SCSI)))
-               goto out;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
 
-       if (!request_mem_region(A4000T_SCSI_ADDR, 0x1000,
+       if (!request_mem_region(res->start, resource_size(res),
                                "A4000T builtin SCSI"))
-               goto out;
+               return -EBUSY;
 
-       hostdata = kzalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL);
+       hostdata = kzalloc(sizeof(struct NCR_700_Host_Parameters),
+                          GFP_KERNEL);
        if (!hostdata) {
-               printk(KERN_ERR "a4000t-scsi: Failed to allocate host data\n");
+               dev_err(&pdev->dev, "Failed to allocate host data\n");
                goto out_release;
        }
 
+       scsi_addr = res->start + A4000T_SCSI_OFFSET;
+
        /* Fill in the required pieces of hostdata */
-       hostdata->base = (void __iomem *)ZTWO_VADDR(A4000T_SCSI_ADDR);
+       hostdata->base = (void __iomem *)ZTWO_VADDR(scsi_addr);
        hostdata->clock = 50;
        hostdata->chip710 = 1;
        hostdata->dmode_extra = DMODE_FC2;
@@ -63,26 +64,25 @@ static int __devinit a4000t_probe(struct platform_device *dev)
 
        /* and register the chip */
        host = NCR_700_detect(&a4000t_scsi_driver_template, hostdata,
-                             &dev->dev);
+                             &pdev->dev);
        if (!host) {
-               printk(KERN_ERR "a4000t-scsi: No host detected; "
-                               "board configuration problem?\n");
+               dev_err(&pdev->dev,
+                       "No host detected; board configuration problem?\n");
                goto out_free;
        }
 
        host->this_id = 7;
-       host->base = A4000T_SCSI_ADDR;
+       host->base = scsi_addr;
        host->irq = IRQ_AMIGA_PORTS;
 
        if (request_irq(host->irq, NCR_700_intr, IRQF_SHARED, "a4000t-scsi",
                        host)) {
-               printk(KERN_ERR "a4000t-scsi: request_irq failed\n");
+               dev_err(&pdev->dev, "request_irq failed\n");
                goto out_put_host;
        }
 
-       platform_set_drvdata(dev, host);
+       platform_set_drvdata(pdev, host);
        scsi_scan_host(host);
-
        return 0;
 
  out_put_host:
@@ -90,58 +90,49 @@ static int __devinit a4000t_probe(struct platform_device *dev)
  out_free:
        kfree(hostdata);
  out_release:
-       release_mem_region(A4000T_SCSI_ADDR, 0x1000);
- out:
+       release_mem_region(res->start, resource_size(res));
        return -ENODEV;
 }
 
-static __devexit int a4000t_device_remove(struct platform_device *dev)
+static int __exit amiga_a4000t_scsi_remove(struct platform_device *pdev)
 {
-       struct Scsi_Host *host = platform_get_drvdata(dev);
+       struct Scsi_Host *host = platform_get_drvdata(pdev);
        struct NCR_700_Host_Parameters *hostdata = shost_priv(host);
+       struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
        scsi_remove_host(host);
-
        NCR_700_release(host);
        kfree(hostdata);
        free_irq(host->irq, host);
-       release_mem_region(A4000T_SCSI_ADDR, 0x1000);
-
+       release_mem_region(res->start, resource_size(res));
        return 0;
 }
 
-static struct platform_driver a4000t_scsi_driver = {
-       .driver = {
-               .name           = "a4000t-scsi",
-               .owner          = THIS_MODULE,
+static struct platform_driver amiga_a4000t_scsi_driver = {
+       .remove = __exit_p(amiga_a4000t_scsi_remove),
+       .driver   = {
+               .name   = "amiga-a4000t-scsi",
+               .owner  = THIS_MODULE,
        },
-       .probe          = a4000t_probe,
-       .remove         = __devexit_p(a4000t_device_remove),
 };
 
-static int __init a4000t_scsi_init(void)
+static int __init amiga_a4000t_scsi_init(void)
 {
-       int err;
-
-       err = platform_driver_register(&a4000t_scsi_driver);
-       if (err)
-               return err;
-
-       a4000t_scsi_device = platform_device_register_simple("a4000t-scsi",
-                       -1, NULL, 0);
-       if (IS_ERR(a4000t_scsi_device)) {
-               platform_driver_unregister(&a4000t_scsi_driver);
-               return PTR_ERR(a4000t_scsi_device);
-       }
-
-       return err;
+       return platform_driver_probe(&amiga_a4000t_scsi_driver,
+                                    amiga_a4000t_scsi_probe);
 }
 
-static void __exit a4000t_scsi_exit(void)
+module_init(amiga_a4000t_scsi_init);
+
+static void __exit amiga_a4000t_scsi_exit(void)
 {
-       platform_device_unregister(a4000t_scsi_device);
-       platform_driver_unregister(&a4000t_scsi_driver);
+       platform_driver_unregister(&amiga_a4000t_scsi_driver);
 }
 
-module_init(a4000t_scsi_init);
-module_exit(a4000t_scsi_exit);
+module_exit(amiga_a4000t_scsi_exit);
+
+MODULE_AUTHOR("Alan Hourihane <alanh@fairlite.demon.co.uk> / "
+             "Kars de Jong <jongk@linux-m68k.org>");
+MODULE_DESCRIPTION("Amiga A4000T NCR53C710 driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:amiga-a4000t-scsi");
index 9c0c911..1a5bf57 100644 (file)
@@ -655,9 +655,9 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
                                /* Does this really need to be GFP_DMA? */
                                p = kmalloc(usg->sg[i].count,GFP_KERNEL|__GFP_DMA);
                                if(!p) {
-                                       kfree (usg);
-                                       dprintk((KERN_DEBUG"aacraid: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
+                                       dprintk((KERN_DEBUG "aacraid: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
                                          usg->sg[i].count,i,usg->count));
+                                       kfree(usg);
                                        rcode = -ENOMEM;
                                        goto cleanup;
                                }
index ab646e5..ce5371b 100644 (file)
@@ -48,7 +48,7 @@ struct device_attribute;
 /*The limit of outstanding scsi command that firmware can handle*/
 #define ARCMSR_MAX_OUTSTANDING_CMD                                             256
 #define ARCMSR_MAX_FREECCB_NUM                                                 320
-#define ARCMSR_DRIVER_VERSION               "Driver Version 1.20.00.15 2008/02/27"
+#define ARCMSR_DRIVER_VERSION               "Driver Version 1.20.00.15 2008/11/03"
 #define ARCMSR_SCSI_INITIATOR_ID                                               255
 #define ARCMSR_MAX_XFER_SECTORS                                                        512
 #define ARCMSR_MAX_XFER_SECTORS_B                                              4096
@@ -110,6 +110,8 @@ struct CMD_MESSAGE_FIELD
 #define FUNCTION_SAY_HELLO                     0x0807
 #define FUNCTION_SAY_GOODBYE                   0x0808
 #define FUNCTION_FLUSH_ADAPTER_CACHE           0x0809
+#define FUNCTION_GET_FIRMWARE_STATUS                   0x080A
+#define FUNCTION_HARDWARE_RESET                        0x080B
 /* ARECA IO CONTROL CODE*/
 #define ARCMSR_MESSAGE_READ_RQBUFFER       \
        ARECA_SATA_RAID | FUNCTION_READ_RQBUFFER
@@ -133,6 +135,7 @@ struct CMD_MESSAGE_FIELD
 #define ARCMSR_MESSAGE_RETURNCODE_OK              0x00000001
 #define ARCMSR_MESSAGE_RETURNCODE_ERROR           0x00000006
 #define ARCMSR_MESSAGE_RETURNCODE_3F              0x0000003F
+#define ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON  0x00000088
 /*
 *************************************************************
 **   structure for holding DMA address data
@@ -341,13 +344,13 @@ struct MessageUnit_B
        uint32_t        done_qbuffer[ARCMSR_MAX_HBB_POSTQUEUE];
        uint32_t        postq_index;
        uint32_t        doneq_index;
-       void            __iomem *drv2iop_doorbell_reg;
-       void            __iomem *drv2iop_doorbell_mask_reg;
-       void            __iomem *iop2drv_doorbell_reg;
-       void            __iomem *iop2drv_doorbell_mask_reg;
-       void            __iomem *msgcode_rwbuffer_reg;
-       void            __iomem *ioctl_wbuffer_reg;
-       void            __iomem *ioctl_rbuffer_reg;
+       uint32_t                __iomem *drv2iop_doorbell_reg;
+       uint32_t                __iomem *drv2iop_doorbell_mask_reg;
+       uint32_t                __iomem *iop2drv_doorbell_reg;
+       uint32_t                __iomem *iop2drv_doorbell_mask_reg;
+       uint32_t                __iomem *msgcode_rwbuffer_reg;
+       uint32_t                __iomem *ioctl_wbuffer_reg;
+       uint32_t                __iomem *ioctl_rbuffer_reg;
 };
 
 /*
@@ -375,6 +378,7 @@ struct AdapterControlBlock
        /* message unit ATU inbound base address0 */
 
        uint32_t                        acb_flags;
+       uint8_t                                 adapter_index;
        #define ACB_F_SCSISTOPADAPTER           0x0001
        #define ACB_F_MSG_STOP_BGRB             0x0002
        /* stop RAID background rebuild */
@@ -390,7 +394,7 @@ struct AdapterControlBlock
        #define ACB_F_BUS_RESET                 0x0080
        #define ACB_F_IOP_INITED                0x0100
        /* iop init */
-
+       #define ACB_F_FIRMWARE_TRAP                     0x0400
        struct CommandControlBlock *                    pccb_pool[ARCMSR_MAX_FREECCB_NUM];
        /* used for memory free */
        struct list_head                ccb_free_list;
@@ -423,12 +427,19 @@ struct AdapterControlBlock
 #define ARECA_RAID_GOOD               0xaa
        uint32_t                        num_resets;
        uint32_t                        num_aborts;
+       uint32_t                        signature;
        uint32_t                        firm_request_len;
        uint32_t                        firm_numbers_queue;
        uint32_t                        firm_sdram_size;
        uint32_t                        firm_hd_channels;
        char                            firm_model[12];
        char                            firm_version[20];
+       char                    device_map[20];                 /*21,84-99*/
+       struct work_struct              arcmsr_do_message_isr_bh;
+       struct timer_list               eternal_timer;
+       unsigned short          fw_state;
+       atomic_t                        rq_map_token;
+       int                     ante_token_value;
 };/* HW_DEVICE_EXTENSION */
 /*
 *******************************************************************************
index a4e04c5..07fdfe5 100644 (file)
@@ -192,6 +192,7 @@ static struct bin_attribute arcmsr_sysfs_message_read_attr = {
        .attr = {
                .name = "mu_read",
                .mode = S_IRUSR ,
+               .owner = THIS_MODULE,
        },
        .size = 1032,
        .read = arcmsr_sysfs_iop_message_read,
@@ -201,6 +202,7 @@ static struct bin_attribute arcmsr_sysfs_message_write_attr = {
        .attr = {
                .name = "mu_write",
                .mode = S_IWUSR,
+               .owner = THIS_MODULE,
        },
        .size = 1032,
        .write = arcmsr_sysfs_iop_message_write,
@@ -210,6 +212,7 @@ static struct bin_attribute arcmsr_sysfs_message_clear_attr = {
        .attr = {
                .name = "mu_clear",
                .mode = S_IWUSR,
+               .owner = THIS_MODULE,
        },
        .size = 1,
        .write = arcmsr_sysfs_iop_message_clear,
index ffbe219..ffa5479 100644 (file)
 #include <scsi/scsicam.h>
 #include "arcmsr.h"
 
+#ifdef CONFIG_SCSI_ARCMSR_RESET
+       static int sleeptime = 20;
+       static int retrycount = 12;
+       module_param(sleeptime, int, S_IRUGO|S_IWUSR);
+       MODULE_PARM_DESC(sleeptime, "The waiting period for FW ready while bus reset");
+       module_param(retrycount, int, S_IRUGO|S_IWUSR);
+       MODULE_PARM_DESC(retrycount, "The retry count for FW ready while bus reset");
+#endif
 MODULE_AUTHOR("Erich Chen <support@areca.com.tw>");
-MODULE_DESCRIPTION("ARECA (ARC11xx/12xx/13xx/16xx) SATA/SAS RAID HOST Adapter");
+MODULE_DESCRIPTION("ARECA (ARC11xx/12xx/13xx/16xx) SATA/SAS RAID Host Bus Adapter");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(ARCMSR_DRIVER_VERSION);
 
@@ -96,6 +104,13 @@ static u32 arcmsr_disable_outbound_ints(struct AdapterControlBlock *acb);
 static void arcmsr_stop_adapter_bgrb(struct AdapterControlBlock *acb);
 static void arcmsr_flush_hba_cache(struct AdapterControlBlock *acb);
 static void arcmsr_flush_hbb_cache(struct AdapterControlBlock *acb);
+static void arcmsr_request_device_map(unsigned long pacb);
+static void arcmsr_request_hba_device_map(struct AdapterControlBlock *acb);
+static void arcmsr_request_hbb_device_map(struct AdapterControlBlock *acb);
+static void arcmsr_message_isr_bh_fn(struct work_struct *work);
+static void *arcmsr_get_firmware_spec(struct AdapterControlBlock *acb, int mode);
+static void arcmsr_start_adapter_bgrb(struct AdapterControlBlock *acb);
+
 static const char *arcmsr_info(struct Scsi_Host *);
 static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb);
 static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev,
@@ -112,7 +127,7 @@ static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev,
 
 static struct scsi_host_template arcmsr_scsi_host_template = {
        .module                 = THIS_MODULE,
-       .name                   = "ARCMSR ARECA SATA/SAS RAID HOST Adapter"
+       .name                   = "ARCMSR ARECA SATA/SAS RAID Host Bus Adapter"
                                                        ARCMSR_DRIVER_VERSION,
        .info                   = arcmsr_info,
        .queuecommand           = arcmsr_queue_command,
@@ -128,16 +143,6 @@ static struct scsi_host_template arcmsr_scsi_host_template = {
        .use_clustering         = ENABLE_CLUSTERING,
        .shost_attrs            = arcmsr_host_attrs,
 };
-#ifdef CONFIG_SCSI_ARCMSR_AER
-static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev);
-static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev,
-                                               pci_channel_state_t state);
-
-static struct pci_error_handlers arcmsr_pci_error_handlers = {
-       .error_detected         = arcmsr_pci_error_detected,
-       .slot_reset             = arcmsr_pci_slot_reset,
-};
-#endif
 static struct pci_device_id arcmsr_device_id_table[] = {
        {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)},
        {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1120)},
@@ -166,9 +171,6 @@ static struct pci_driver arcmsr_pci_driver = {
        .probe                  = arcmsr_probe,
        .remove                 = arcmsr_remove,
        .shutdown               = arcmsr_shutdown,
-       #ifdef CONFIG_SCSI_ARCMSR_AER
-       .err_handler            = &arcmsr_pci_error_handlers,
-       #endif
 };
 
 static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id)
@@ -236,10 +238,9 @@ static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb)
                void *dma_coherent;
                dma_addr_t dma_coherent_handle, dma_addr;
                struct CommandControlBlock *ccb_tmp;
-               uint32_t intmask_org;
                int i, j;
 
-               acb->pmuA = pci_ioremap_bar(pdev, 0);
+               acb->pmuA = ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
                if (!acb->pmuA) {
                        printk(KERN_NOTICE "arcmsr%d: memory mapping region fail \n",
                                                        acb->host->host_no);
@@ -281,12 +282,6 @@ static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb)
                for (i = 0; i < ARCMSR_MAX_TARGETID; i++)
                        for (j = 0; j < ARCMSR_MAX_TARGETLUN; j++)
                                acb->devstate[i][j] = ARECA_RAID_GONE;
-
-               /*
-               ** here we need to tell iop 331 our ccb_tmp.HighPart
-               ** if ccb_tmp.HighPart is not zero
-               */
-               intmask_org = arcmsr_disable_outbound_ints(acb);
                }
                break;
 
@@ -297,7 +292,6 @@ static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb)
                void __iomem *mem_base0, *mem_base1;
                void *dma_coherent;
                dma_addr_t dma_coherent_handle, dma_addr;
-               uint32_t intmask_org;
                struct CommandControlBlock *ccb_tmp;
                int i, j;
 
@@ -333,11 +327,13 @@ static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb)
                reg = (struct MessageUnit_B *)(dma_coherent +
                ARCMSR_MAX_FREECCB_NUM * sizeof(struct CommandControlBlock));
                acb->pmuB = reg;
-               mem_base0 = pci_ioremap_bar(pdev, 0);
+               mem_base0 = ioremap(pci_resource_start(pdev, 0),
+                                       pci_resource_len(pdev, 0));
                if (!mem_base0)
                        goto out;
 
-               mem_base1 = pci_ioremap_bar(pdev, 2);
+               mem_base1 = ioremap(pci_resource_start(pdev, 2),
+                                       pci_resource_len(pdev, 2));
                if (!mem_base1) {
                        iounmap(mem_base0);
                        goto out;
@@ -357,12 +353,6 @@ static int arcmsr_alloc_ccb_pool(struct AdapterControlBlock *acb)
                for (i = 0; i < ARCMSR_MAX_TARGETID; i++)
                        for (j = 0; j < ARCMSR_MAX_TARGETLUN; j++)
                                acb->devstate[i][j] = ARECA_RAID_GOOD;
-
-               /*
-               ** here we need to tell iop 331 our ccb_tmp.HighPart
-               ** if ccb_tmp.HighPart is not zero
-               */
-               intmask_org = arcmsr_disable_outbound_ints(acb);
                }
                break;
        }
@@ -374,6 +364,88 @@ out:
                sizeof(struct MessageUnit_B)), acb->dma_coherent, acb->dma_coherent_handle);
        return -ENOMEM;
 }
+static void arcmsr_message_isr_bh_fn(struct work_struct *work)
+{
+       struct AdapterControlBlock *acb = container_of(work, struct AdapterControlBlock, arcmsr_do_message_isr_bh);
+
+       switch (acb->adapter_type) {
+               case ACB_ADAPTER_TYPE_A: {
+
+                       struct MessageUnit_A __iomem *reg  = acb->pmuA;
+                       char *acb_dev_map = (char *)acb->device_map;
+                       uint32_t __iomem *signature = (uint32_t __iomem *) (&reg->message_rwbuffer[0]);
+                       char __iomem *devicemap = (char __iomem *) (&reg->message_rwbuffer[21]);
+                       int target, lun;
+                       struct scsi_device *psdev;
+                       char diff;
+
+                       atomic_inc(&acb->rq_map_token);
+                       if (readl(signature) == ARCMSR_SIGNATURE_GET_CONFIG) {
+                               for (target = 0; target < ARCMSR_MAX_TARGETID - 1; target++) {
+                                       diff = (*acb_dev_map)^readb(devicemap);
+                                       if (diff != 0) {
+                                               char temp;
+                                               *acb_dev_map = readb(devicemap);
+                                               temp = *acb_dev_map;
+                                               for (lun = 0; lun < ARCMSR_MAX_TARGETLUN; lun++) {
+                                                       if ((temp & 0x01) == 1 && (diff & 0x01) == 1) {
+                                                               scsi_add_device(acb->host, 0, target, lun);
+                                                       } else if ((temp & 0x01) == 0 && (diff & 0x01) == 1) {
+                                                               psdev = scsi_device_lookup(acb->host, 0, target, lun);
+                                                               if (psdev != NULL) {
+                                                                       scsi_remove_device(psdev);
+                                                                       scsi_device_put(psdev);
+                                                               }
+                                                       }
+                                                       temp >>= 1;
+                                                       diff >>= 1;
+                                               }
+                                       }
+                                       devicemap++;
+                                       acb_dev_map++;
+                               }
+                       }
+                       break;
+               }
+
+               case ACB_ADAPTER_TYPE_B: {
+                       struct MessageUnit_B *reg  = acb->pmuB;
+                       char *acb_dev_map = (char *)acb->device_map;
+                       uint32_t __iomem *signature = (uint32_t __iomem *)(&reg->msgcode_rwbuffer_reg[0]);
+                       char __iomem *devicemap = (char __iomem *)(&reg->msgcode_rwbuffer_reg[21]);
+                       int target, lun;
+                       struct scsi_device *psdev;
+                       char diff;
+
+                       atomic_inc(&acb->rq_map_token);
+                       if (readl(signature) == ARCMSR_SIGNATURE_GET_CONFIG) {
+                               for (target = 0; target < ARCMSR_MAX_TARGETID - 1; target++) {
+                                       diff = (*acb_dev_map)^readb(devicemap);
+                                       if (diff != 0) {
+                                               char temp;
+                                               *acb_dev_map = readb(devicemap);
+                                               temp = *acb_dev_map;
+                                               for (lun = 0; lun < ARCMSR_MAX_TARGETLUN; lun++) {
+                                                       if ((temp & 0x01) == 1 && (diff & 0x01) == 1) {
+                                                               scsi_add_device(acb->host, 0, target, lun);
+                                                       } else if ((temp & 0x01) == 0 && (diff & 0x01) == 1) {
+                                                               psdev = scsi_device_lookup(acb->host, 0, target, lun);
+                                                               if (psdev != NULL) {
+                                                                       scsi_remove_device(psdev);
+                                                                       scsi_device_put(psdev);
+                                                               }
+                                                       }
+                                                       temp >>= 1;
+                                                       diff >>= 1;
+                                               }
+                                       }
+                                       devicemap++;
+                                       acb_dev_map++;
+                               }
+                       }
+               }
+       }
+}
 
 static int arcmsr_probe(struct pci_dev *pdev,
        const struct pci_device_id *id)
@@ -432,17 +504,17 @@ static int arcmsr_probe(struct pci_dev *pdev,
                           ACB_F_MESSAGE_WQBUFFER_READED);
        acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER;
        INIT_LIST_HEAD(&acb->ccb_free_list);
-
+       INIT_WORK(&acb->arcmsr_do_message_isr_bh, arcmsr_message_isr_bh_fn);
        error = arcmsr_alloc_ccb_pool(acb);
        if (error)
                goto out_release_regions;
 
+       arcmsr_iop_init(acb);
        error = request_irq(pdev->irq, arcmsr_do_interrupt,
                            IRQF_SHARED, "arcmsr", acb);
        if (error)
                goto out_free_ccb_pool;
 
-       arcmsr_iop_init(acb);
        pci_set_drvdata(pdev, host);
        if (strncmp(acb->firm_version, "V1.42", 5) >= 0)
                host->max_sectors= ARCMSR_MAX_XFER_SECTORS_B;
@@ -459,6 +531,14 @@ static int arcmsr_probe(struct pci_dev *pdev,
        #ifdef CONFIG_SCSI_ARCMSR_AER
        pci_enable_pcie_error_reporting(pdev);
        #endif
+       atomic_set(&acb->rq_map_token, 16);
+       acb->fw_state = true;
+       init_timer(&acb->eternal_timer);
+       acb->eternal_timer.expires = jiffies + msecs_to_jiffies(10*HZ);
+       acb->eternal_timer.data = (unsigned long) acb;
+       acb->eternal_timer.function = &arcmsr_request_device_map;
+       add_timer(&acb->eternal_timer);
+
        return 0;
  out_free_sysfs:
  out_free_irq:
@@ -518,40 +598,48 @@ static uint8_t arcmsr_hbb_wait_msgint_ready(struct AdapterControlBlock *acb)
        return 0xff;
 }
 
-static void arcmsr_abort_hba_allcmd(struct AdapterControlBlock *acb)
+static uint8_t arcmsr_abort_hba_allcmd(struct AdapterControlBlock *acb)
 {
        struct MessageUnit_A __iomem *reg = acb->pmuA;
 
        writel(ARCMSR_INBOUND_MESG0_ABORT_CMD, &reg->inbound_msgaddr0);
-       if (arcmsr_hba_wait_msgint_ready(acb))
+       if (arcmsr_hba_wait_msgint_ready(acb)) {
                printk(KERN_NOTICE
                        "arcmsr%d: wait 'abort all outstanding command' timeout \n"
                        , acb->host->host_no);
+               return 0xff;
+       }
+       return 0x00;
 }
 
-static void arcmsr_abort_hbb_allcmd(struct AdapterControlBlock *acb)
+static uint8_t arcmsr_abort_hbb_allcmd(struct AdapterControlBlock *acb)
 {
        struct MessageUnit_B *reg = acb->pmuB;
 
        writel(ARCMSR_MESSAGE_ABORT_CMD, reg->drv2iop_doorbell_reg);
-       if (arcmsr_hbb_wait_msgint_ready(acb))
+       if (arcmsr_hbb_wait_msgint_ready(acb)) {
                printk(KERN_NOTICE
                        "arcmsr%d: wait 'abort all outstanding command' timeout \n"
                        , acb->host->host_no);
+               return 0xff;
+       }
+       return 0x00;
 }
 
-static void arcmsr_abort_allcmd(struct AdapterControlBlock *acb)
+static uint8_t arcmsr_abort_allcmd(struct AdapterControlBlock *acb)
 {
+       uint8_t rtnval = 0;
        switch (acb->adapter_type) {
        case ACB_ADAPTER_TYPE_A: {
-               arcmsr_abort_hba_allcmd(acb);
+               rtnval = arcmsr_abort_hba_allcmd(acb);
                }
                break;
 
        case ACB_ADAPTER_TYPE_B: {
-               arcmsr_abort_hbb_allcmd(acb);
+               rtnval = arcmsr_abort_hbb_allcmd(acb);
                }
        }
+       return rtnval;
 }
 
 static void arcmsr_pci_unmap_dma(struct CommandControlBlock *ccb)
@@ -649,8 +737,7 @@ static u32 arcmsr_disable_outbound_ints(struct AdapterControlBlock *acb)
 
        case ACB_ADAPTER_TYPE_A : {
                struct MessageUnit_A __iomem *reg = acb->pmuA;
-               orig_mask = readl(&reg->outbound_intmask)|\
-                               ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE;
+               orig_mask = readl(&reg->outbound_intmask);
                writel(orig_mask|ARCMSR_MU_OUTBOUND_ALL_INTMASKENABLE, \
                                                &reg->outbound_intmask);
                }
@@ -658,8 +745,7 @@ static u32 arcmsr_disable_outbound_ints(struct AdapterControlBlock *acb)
 
        case ACB_ADAPTER_TYPE_B : {
                struct MessageUnit_B *reg = acb->pmuB;
-               orig_mask = readl(reg->iop2drv_doorbell_mask_reg) & \
-                                       (~ARCMSR_IOP2DRV_MESSAGE_CMD_DONE);
+               orig_mask = readl(reg->iop2drv_doorbell_mask_reg);
                writel(0, reg->iop2drv_doorbell_mask_reg);
                }
                break;
@@ -795,12 +881,13 @@ static void arcmsr_remove(struct pci_dev *pdev)
        struct AdapterControlBlock *acb =
                (struct AdapterControlBlock *) host->hostdata;
        int poll_count = 0;
-
        arcmsr_free_sysfs_attr(acb);
        scsi_remove_host(host);
+       flush_scheduled_work();
+       del_timer_sync(&acb->eternal_timer);
+       arcmsr_disable_outbound_ints(acb);
        arcmsr_stop_adapter_bgrb(acb);
        arcmsr_flush_adapter_cache(acb);
-       arcmsr_disable_outbound_ints(acb);
        acb->acb_flags |= ACB_F_SCSISTOPADAPTER;
        acb->acb_flags &= ~ACB_F_IOP_INITED;
 
@@ -841,7 +928,9 @@ static void arcmsr_shutdown(struct pci_dev *pdev)
        struct Scsi_Host *host = pci_get_drvdata(pdev);
        struct AdapterControlBlock *acb =
                (struct AdapterControlBlock *)host->hostdata;
-
+       del_timer_sync(&acb->eternal_timer);
+       arcmsr_disable_outbound_ints(acb);
+       flush_scheduled_work();
        arcmsr_stop_adapter_bgrb(acb);
        arcmsr_flush_adapter_cache(acb);
 }
@@ -861,7 +950,7 @@ static void arcmsr_module_exit(void)
 module_init(arcmsr_module_init);
 module_exit(arcmsr_module_exit);
 
-static void arcmsr_enable_outbound_ints(struct AdapterControlBlock *acb, \
+static void arcmsr_enable_outbound_ints(struct AdapterControlBlock *acb,
                                                u32 intmask_org)
 {
        u32 mask;
@@ -871,7 +960,8 @@ static void arcmsr_enable_outbound_ints(struct AdapterControlBlock *acb, \
        case ACB_ADAPTER_TYPE_A : {
                struct MessageUnit_A __iomem *reg = acb->pmuA;
                mask = intmask_org & ~(ARCMSR_MU_OUTBOUND_POSTQUEUE_INTMASKENABLE |
-                            ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE);
+                            ARCMSR_MU_OUTBOUND_DOORBELL_INTMASKENABLE|
+                            ARCMSR_MU_OUTBOUND_MESSAGE0_INTMASKENABLE);
                writel(mask, &reg->outbound_intmask);
                acb->outbound_int_enable = ~(intmask_org & mask) & 0x000000ff;
                }
@@ -879,8 +969,10 @@ static void arcmsr_enable_outbound_ints(struct AdapterControlBlock *acb, \
 
        case ACB_ADAPTER_TYPE_B : {
                struct MessageUnit_B *reg = acb->pmuB;
-               mask = intmask_org | (ARCMSR_IOP2DRV_DATA_WRITE_OK | \
-                       ARCMSR_IOP2DRV_DATA_READ_OK | ARCMSR_IOP2DRV_CDB_DONE);
+               mask = intmask_org | (ARCMSR_IOP2DRV_DATA_WRITE_OK |
+                       ARCMSR_IOP2DRV_DATA_READ_OK |
+                       ARCMSR_IOP2DRV_CDB_DONE |
+                       ARCMSR_IOP2DRV_MESSAGE_CMD_DONE);
                writel(mask, reg->iop2drv_doorbell_mask_reg);
                acb->outbound_int_enable = (intmask_org | mask) & 0x0000000f;
                }
@@ -1048,8 +1140,8 @@ static void arcmsr_free_ccb_pool(struct AdapterControlBlock *acb)
        }
        case ACB_ADAPTER_TYPE_B: {
                struct MessageUnit_B *reg = acb->pmuB;
-               iounmap(reg->drv2iop_doorbell_reg - ARCMSR_DRV2IOP_DOORBELL);
-               iounmap(reg->ioctl_wbuffer_reg - ARCMSR_IOCTL_WBUFFER);
+               iounmap((u8 *)reg->drv2iop_doorbell_reg - ARCMSR_DRV2IOP_DOORBELL);
+               iounmap((u8 *)reg->ioctl_wbuffer_reg - ARCMSR_IOCTL_WBUFFER);
                dma_free_coherent(&acb->pdev->dev,
                (ARCMSR_MAX_FREECCB_NUM * sizeof(struct CommandControlBlock) + 0x20 +
                sizeof(struct MessageUnit_B)), acb->dma_coherent, acb->dma_coherent_handle);
@@ -1249,13 +1341,36 @@ static void arcmsr_hbb_postqueue_isr(struct AdapterControlBlock *acb)
                reg->doneq_index = index;
        }
 }
+/*
+**********************************************************************************
+** Handle a message interrupt
+**
+** The only message interrupt we expect is in response to a query for the current adapter config.
+** We want this in order to compare the drivemap so that we can detect newly-attached drives.
+**********************************************************************************
+*/
+static void arcmsr_hba_message_isr(struct AdapterControlBlock *acb)
+{
+       struct MessageUnit_A *reg  = acb->pmuA;
+
+       /*clear interrupt and message state*/
+       writel(ARCMSR_MU_OUTBOUND_MESSAGE0_INT, &reg->outbound_intstatus);
+       schedule_work(&acb->arcmsr_do_message_isr_bh);
+}
+static void arcmsr_hbb_message_isr(struct AdapterControlBlock *acb)
+{
+       struct MessageUnit_B *reg  = acb->pmuB;
 
+       /*clear interrupt and message state*/
+       writel(ARCMSR_MESSAGE_INT_CLEAR_PATTERN, reg->iop2drv_doorbell_reg);
+       schedule_work(&acb->arcmsr_do_message_isr_bh);
+}
 static int arcmsr_handle_hba_isr(struct AdapterControlBlock *acb)
 {
        uint32_t outbound_intstatus;
        struct MessageUnit_A __iomem *reg = acb->pmuA;
 
-       outbound_intstatus = readl(&reg->outbound_intstatus) & \
+       outbound_intstatus = readl(&reg->outbound_intstatus) &
                                                        acb->outbound_int_enable;
        if (!(outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT))      {
                return 1;
@@ -1267,6 +1382,10 @@ static int arcmsr_handle_hba_isr(struct AdapterControlBlock *acb)
        if (outbound_intstatus & ARCMSR_MU_OUTBOUND_POSTQUEUE_INT) {
                arcmsr_hba_postqueue_isr(acb);
        }
+       if (outbound_intstatus & ARCMSR_MU_OUTBOUND_MESSAGE0_INT)       {
+               /* messenger of "driver to iop commands" */
+               arcmsr_hba_message_isr(acb);
+       }
        return 0;
 }
 
@@ -1275,13 +1394,14 @@ static int arcmsr_handle_hbb_isr(struct AdapterControlBlock *acb)
        uint32_t outbound_doorbell;
        struct MessageUnit_B *reg = acb->pmuB;
 
-       outbound_doorbell = readl(reg->iop2drv_doorbell_reg) & \
+       outbound_doorbell = readl(reg->iop2drv_doorbell_reg) &
                                                        acb->outbound_int_enable;
        if (!outbound_doorbell)
                return 1;
 
        writel(~outbound_doorbell, reg->iop2drv_doorbell_reg);
-       /*in case the last action of doorbell interrupt clearance is cached, this action can push HW to write down the clear bit*/
+       /*in case the last action of doorbell interrupt clearance is cached,
+       this action can push HW to write down the clear bit*/
        readl(reg->iop2drv_doorbell_reg);
        writel(ARCMSR_DRV2IOP_END_OF_INTERRUPT, reg->drv2iop_doorbell_reg);
        if (outbound_doorbell & ARCMSR_IOP2DRV_DATA_WRITE_OK)   {
@@ -1293,6 +1413,10 @@ static int arcmsr_handle_hbb_isr(struct AdapterControlBlock *acb)
        if (outbound_doorbell & ARCMSR_IOP2DRV_CDB_DONE) {
                arcmsr_hbb_postqueue_isr(acb);
        }
+       if (outbound_doorbell & ARCMSR_IOP2DRV_MESSAGE_CMD_DONE) {
+               /* messenger of "driver to iop commands" */
+               arcmsr_hbb_message_isr(acb);
+       }
 
        return 0;
 }
@@ -1360,7 +1484,7 @@ void arcmsr_post_ioctldata2iop(struct AdapterControlBlock *acb)
        }
 }
 
-static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
+static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb,
                                        struct scsi_cmnd *cmd)
 {
        struct CMD_MESSAGE_FIELD *pcmdmessagefld;
@@ -1398,6 +1522,13 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
                        retvalue = ARCMSR_MESSAGE_FAIL;
                        goto message_out;
                }
+
+               if (!acb->fw_state) {
+                       pcmdmessagefld->cmdmessage.ReturnCode =
+                       ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON;
+                       goto message_out;
+               }
+
                ptmpQbuffer = ver_addr;
                while ((acb->rqbuf_firstindex != acb->rqbuf_lastindex)
                        && (allxfer_len < 1031)) {
@@ -1444,6 +1575,12 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
                        retvalue = ARCMSR_MESSAGE_FAIL;
                        goto message_out;
                }
+               if (!acb->fw_state) {
+                       pcmdmessagefld->cmdmessage.ReturnCode =
+                       ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON;
+                       goto message_out;
+               }
+
                ptmpuserbuffer = ver_addr;
                user_len = pcmdmessagefld->cmdmessage.Length;
                memcpy(ptmpuserbuffer, pcmdmessagefld->messagedatabuffer, user_len);
@@ -1496,6 +1633,11 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
 
        case ARCMSR_MESSAGE_CLEAR_RQBUFFER: {
                uint8_t *pQbuffer = acb->rqbuffer;
+               if (!acb->fw_state) {
+                       pcmdmessagefld->cmdmessage.ReturnCode =
+                       ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON;
+                       goto message_out;
+               }
 
                if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
                        acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
@@ -1511,6 +1653,11 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
 
        case ARCMSR_MESSAGE_CLEAR_WQBUFFER: {
                uint8_t *pQbuffer = acb->wqbuffer;
+               if (!acb->fw_state) {
+                       pcmdmessagefld->cmdmessage.ReturnCode =
+                       ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON;
+                       goto message_out;
+               }
 
                if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
                        acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
@@ -1529,6 +1676,11 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
 
        case ARCMSR_MESSAGE_CLEAR_ALLQBUFFER: {
                uint8_t *pQbuffer;
+               if (!acb->fw_state) {
+                       pcmdmessagefld->cmdmessage.ReturnCode =
+                       ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON;
+                       goto message_out;
+               }
 
                if (acb->acb_flags & ACB_F_IOPDATA_OVERFLOW) {
                        acb->acb_flags &= ~ACB_F_IOPDATA_OVERFLOW;
@@ -1551,13 +1703,22 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
                break;
 
        case ARCMSR_MESSAGE_RETURN_CODE_3F: {
+               if (!acb->fw_state) {
+                       pcmdmessagefld->cmdmessage.ReturnCode =
+                       ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON;
+                       goto message_out;
+               }
                pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_3F;
                }
                break;
 
        case ARCMSR_MESSAGE_SAY_HELLO: {
                int8_t *hello_string = "Hello! I am ARCMSR";
-
+               if (!acb->fw_state) {
+                       pcmdmessagefld->cmdmessage.ReturnCode =
+                       ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON;
+                       goto message_out;
+               }
                memcpy(pcmdmessagefld->messagedatabuffer, hello_string
                        , (int16_t)strlen(hello_string));
                pcmdmessagefld->cmdmessage.ReturnCode = ARCMSR_MESSAGE_RETURNCODE_OK;
@@ -1565,10 +1726,20 @@ static int arcmsr_iop_message_xfer(struct AdapterControlBlock *acb, \
                break;
 
        case ARCMSR_MESSAGE_SAY_GOODBYE:
+               if (!acb->fw_state) {
+                       pcmdmessagefld->cmdmessage.ReturnCode =
+                       ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON;
+                       goto message_out;
+               }
                arcmsr_iop_parking(acb);
                break;
 
        case ARCMSR_MESSAGE_FLUSH_ADAPTER_CACHE:
+               if (!acb->fw_state) {
+                       pcmdmessagefld->cmdmessage.ReturnCode =
+                       ARCMSR_MESSAGE_RETURNCODE_BUS_HANG_ON;
+                       goto message_out;
+               }
                arcmsr_flush_adapter_cache(acb);
                break;
 
@@ -1651,16 +1822,57 @@ static int arcmsr_queue_command(struct scsi_cmnd *cmd,
        struct CommandControlBlock *ccb;
        int target = cmd->device->id;
        int lun = cmd->device->lun;
-
+       uint8_t scsicmd = cmd->cmnd[0];
        cmd->scsi_done = done;
        cmd->host_scribble = NULL;
        cmd->result = 0;
+
+       if ((scsicmd == SYNCHRONIZE_CACHE) || (scsicmd == SEND_DIAGNOSTIC)) {
+               if (acb->devstate[target][lun] == ARECA_RAID_GONE) {
+                       cmd->result = (DID_NO_CONNECT << 16);
+               }
+               cmd->scsi_done(cmd);
+               return 0;
+       }
+
        if (acb->acb_flags & ACB_F_BUS_RESET) {
-               printk(KERN_NOTICE "arcmsr%d: bus reset"
-                       " and return busy \n"
-                       , acb->host->host_no);
+               switch (acb->adapter_type) {
+                       case ACB_ADAPTER_TYPE_A: {
+                               struct MessageUnit_A __iomem *reg = acb->pmuA;
+                               uint32_t intmask_org, outbound_doorbell;
+
+                               if ((readl(&reg->outbound_msgaddr1) &
+                                       ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK) == 0) {
+                                       printk(KERN_NOTICE "arcmsr%d: bus reset and return busy\n",
+                                               acb->host->host_no);
                return SCSI_MLQUEUE_HOST_BUSY;
        }
+
+                               acb->acb_flags &= ~ACB_F_FIRMWARE_TRAP;
+                               printk(KERN_NOTICE "arcmsr%d: hardware bus reset and reset ok\n",
+                                       acb->host->host_no);
+                               /* disable all outbound interrupt */
+                               intmask_org = arcmsr_disable_outbound_ints(acb);
+                               arcmsr_get_firmware_spec(acb, 1);
+                               /*start background rebuild*/
+                               arcmsr_start_adapter_bgrb(acb);
+                               /* clear Qbuffer if door bell ringed */
+                               outbound_doorbell = readl(&reg->outbound_doorbell);
+                               /*clear interrupt */
+                               writel(outbound_doorbell, &reg->outbound_doorbell);
+                               writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK,
+                                       &reg->inbound_doorbell);
+                               /* enable outbound Post Queue,outbound doorbell Interrupt */
+                               arcmsr_enable_outbound_ints(acb, intmask_org);
+                               acb->acb_flags |= ACB_F_IOP_INITED;
+                               acb->acb_flags &= ~ACB_F_BUS_RESET;
+                       }
+                       break;
+                       case ACB_ADAPTER_TYPE_B: {
+                       }
+               }
+       }
+
        if (target == 16) {
                /* virtual device for iop message transfer */
                arcmsr_handle_virtual_command(acb, cmd);
@@ -1699,21 +1911,25 @@ static int arcmsr_queue_command(struct scsi_cmnd *cmd,
        return 0;
 }
 
-static void arcmsr_get_hba_config(struct AdapterControlBlock *acb)
+static void *arcmsr_get_hba_config(struct AdapterControlBlock *acb, int mode)
 {
        struct MessageUnit_A __iomem *reg = acb->pmuA;
        char *acb_firm_model = acb->firm_model;
        char *acb_firm_version = acb->firm_version;
+       char *acb_device_map = acb->device_map;
        char __iomem *iop_firm_model = (char __iomem *)(&reg->message_rwbuffer[15]);
        char __iomem *iop_firm_version = (char __iomem *)(&reg->message_rwbuffer[17]);
+       char __iomem *iop_device_map = (char __iomem *) (&reg->message_rwbuffer[21]);
        int count;
 
        writel(ARCMSR_INBOUND_MESG0_GET_CONFIG, &reg->inbound_msgaddr0);
        if (arcmsr_hba_wait_msgint_ready(acb)) {
                printk(KERN_NOTICE "arcmsr%d: wait 'get adapter firmware \
                        miscellaneous data' timeout \n", acb->host->host_no);
+               return NULL;
        }
 
+       if (mode == 1) {
        count = 8;
        while (count) {
                *acb_firm_model = readb(iop_firm_model);
@@ -1730,34 +1946,48 @@ static void arcmsr_get_hba_config(struct AdapterControlBlock *acb)
                count--;
        }
 
+               count = 16;
+               while (count) {
+                       *acb_device_map = readb(iop_device_map);
+                       acb_device_map++;
+                       iop_device_map++;
+                       count--;
+               }
+
        printk(KERN_INFO        "ARECA RAID ADAPTER%d: FIRMWARE VERSION %s \n"
                , acb->host->host_no
                , acb->firm_version);
-
+               acb->signature = readl(&reg->message_rwbuffer[0]);
        acb->firm_request_len = readl(&reg->message_rwbuffer[1]);
        acb->firm_numbers_queue = readl(&reg->message_rwbuffer[2]);
        acb->firm_sdram_size = readl(&reg->message_rwbuffer[3]);
        acb->firm_hd_channels = readl(&reg->message_rwbuffer[4]);
 }
-
-static void arcmsr_get_hbb_config(struct AdapterControlBlock *acb)
+       return reg->message_rwbuffer;
+}
+static void __iomem *arcmsr_get_hbb_config(struct AdapterControlBlock *acb, int mode)
 {
        struct MessageUnit_B *reg = acb->pmuB;
        uint32_t __iomem *lrwbuffer = reg->msgcode_rwbuffer_reg;
        char *acb_firm_model = acb->firm_model;
        char *acb_firm_version = acb->firm_version;
+       char *acb_device_map = acb->device_map;
        char __iomem *iop_firm_model = (char __iomem *)(&lrwbuffer[15]);
        /*firm_model,15,60-67*/
        char __iomem *iop_firm_version = (char __iomem *)(&lrwbuffer[17]);
        /*firm_version,17,68-83*/
+       char __iomem *iop_device_map = (char __iomem *) (&lrwbuffer[21]);
+       /*firm_version,21,84-99*/
        int count;
 
        writel(ARCMSR_MESSAGE_GET_CONFIG, reg->drv2iop_doorbell_reg);
        if (arcmsr_hbb_wait_msgint_ready(acb)) {
                printk(KERN_NOTICE "arcmsr%d: wait 'get adapter firmware \
                        miscellaneous data' timeout \n", acb->host->host_no);
+               return NULL;
        }
 
+       if (mode == 1) {
        count = 8;
        while (count)
        {
@@ -1776,11 +2006,20 @@ static void arcmsr_get_hbb_config(struct AdapterControlBlock *acb)
                count--;
        }
 
+               count = 16;
+               while (count) {
+                       *acb_device_map = readb(iop_device_map);
+                       acb_device_map++;
+                       iop_device_map++;
+                       count--;
+               }
+
        printk(KERN_INFO "ARECA RAID ADAPTER%d: FIRMWARE VERSION %s \n",
                        acb->host->host_no,
                        acb->firm_version);
 
-       lrwbuffer++;
+               acb->signature = readl(lrwbuffer++);
+               /*firm_signature,1,00-03*/
        acb->firm_request_len = readl(lrwbuffer++);
        /*firm_request_len,1,04-07*/
        acb->firm_numbers_queue = readl(lrwbuffer++);
@@ -1790,20 +2029,23 @@ static void arcmsr_get_hbb_config(struct AdapterControlBlock *acb)
        acb->firm_hd_channels = readl(lrwbuffer);
        /*firm_ide_channels,4,16-19*/
 }
-
-static void arcmsr_get_firmware_spec(struct AdapterControlBlock *acb)
+       return reg->msgcode_rwbuffer_reg;
+}
+static void *arcmsr_get_firmware_spec(struct AdapterControlBlock *acb, int mode)
 {
+       void *rtnval = 0;
        switch (acb->adapter_type) {
        case ACB_ADAPTER_TYPE_A: {
-               arcmsr_get_hba_config(acb);
+               rtnval = arcmsr_get_hba_config(acb, mode);
                }
                break;
 
        case ACB_ADAPTER_TYPE_B: {
-               arcmsr_get_hbb_config(acb);
+               rtnval = arcmsr_get_hbb_config(acb, mode);
                }
                break;
        }
+       return rtnval;
 }
 
 static void arcmsr_polling_hba_ccbdone(struct AdapterControlBlock *acb,
@@ -2043,6 +2285,66 @@ static void arcmsr_wait_firmware_ready(struct AdapterControlBlock *acb)
        }
 }
 
+static void arcmsr_request_hba_device_map(struct AdapterControlBlock *acb)
+{
+       struct MessageUnit_A __iomem *reg = acb->pmuA;
+
+       if (unlikely(atomic_read(&acb->rq_map_token) == 0)) {
+               acb->fw_state = false;
+       } else {
+       /*to prevent rq_map_token from changing by other interrupt, then
+       avoid the dead-lock*/
+               acb->fw_state = true;
+               atomic_dec(&acb->rq_map_token);
+               if (!(acb->fw_state) ||
+                       (acb->ante_token_value == atomic_read(&acb->rq_map_token))) {
+                       atomic_set(&acb->rq_map_token, 16);
+               }
+               acb->ante_token_value = atomic_read(&acb->rq_map_token);
+               writel(ARCMSR_INBOUND_MESG0_GET_CONFIG, &reg->inbound_msgaddr0);
+       }
+       mod_timer(&acb->eternal_timer, jiffies + msecs_to_jiffies(6000));
+       return;
+}
+
+static void arcmsr_request_hbb_device_map(struct AdapterControlBlock *acb)
+{
+       struct MessageUnit_B __iomem *reg = acb->pmuB;
+
+       if (unlikely(atomic_read(&acb->rq_map_token) == 0)) {
+               acb->fw_state = false;
+       } else {
+       /*to prevent rq_map_token from changing by other interrupt, then
+       avoid the dead-lock*/
+               acb->fw_state = true;
+               atomic_dec(&acb->rq_map_token);
+               if (!(acb->fw_state) ||
+                       (acb->ante_token_value == atomic_read(&acb->rq_map_token))) {
+                       atomic_set(&acb->rq_map_token, 16);
+               }
+               acb->ante_token_value = atomic_read(&acb->rq_map_token);
+               writel(ARCMSR_MESSAGE_GET_CONFIG, reg->drv2iop_doorbell_reg);
+       }
+       mod_timer(&acb->eternal_timer, jiffies + msecs_to_jiffies(6000));
+       return;
+}
+
+static void arcmsr_request_device_map(unsigned long pacb)
+{
+       struct AdapterControlBlock *acb = (struct AdapterControlBlock *)pacb;
+
+       switch (acb->adapter_type) {
+               case ACB_ADAPTER_TYPE_A: {
+                       arcmsr_request_hba_device_map(acb);
+               }
+               break;
+               case ACB_ADAPTER_TYPE_B: {
+                       arcmsr_request_hbb_device_map(acb);
+               }
+               break;
+       }
+}
+
 static void arcmsr_start_hba_bgrb(struct AdapterControlBlock *acb)
 {
        struct MessageUnit_A __iomem *reg = acb->pmuA;
@@ -2121,6 +2423,60 @@ static void arcmsr_enable_eoi_mode(struct AdapterControlBlock *acb)
        return;
 }
 
+static void arcmsr_hardware_reset(struct AdapterControlBlock *acb)
+{
+       uint8_t value[64];
+       int i;
+
+       /* backup pci config data */
+       for (i = 0; i < 64; i++) {
+               pci_read_config_byte(acb->pdev, i, &value[i]);
+       }
+       /* hardware reset signal */
+       pci_write_config_byte(acb->pdev, 0x84, 0x20);
+       msleep(1000);
+       /* write back pci config data */
+       for (i = 0; i < 64; i++) {
+               pci_write_config_byte(acb->pdev, i, value[i]);
+       }
+       msleep(1000);
+       return;
+}
+/*
+****************************************************************************
+****************************************************************************
+*/
+#ifdef CONFIG_SCSI_ARCMSR_RESET
+       int arcmsr_sleep_for_bus_reset(struct scsi_cmnd *cmd)
+       {
+                       struct Scsi_Host *shost = NULL;
+                       spinlock_t *host_lock = NULL;
+                       int i, isleep;
+
+                       shost = cmd->device->host;
+                       host_lock = shost->host_lock;
+
+                       printk(KERN_NOTICE "Host %d bus reset over, sleep %d seconds (busy %d, can queue %d) ...........\n",
+                                       shost->host_no, sleeptime, shost->host_busy, shost->can_queue);
+                       isleep = sleeptime / 10;
+                       spin_unlock_irq(host_lock);
+                       if (isleep > 0) {
+                               for (i = 0; i < isleep; i++) {
+                                       msleep(10000);
+                                       printk(KERN_NOTICE "^%d^\n", i);
+                               }
+                       }
+
+                       isleep = sleeptime % 10;
+                       if (isleep > 0) {
+                               msleep(isleep * 1000);
+                               printk(KERN_NOTICE "^v^\n");
+                       }
+                       spin_lock_irq(host_lock);
+                       printk(KERN_NOTICE "***** wake up *****\n");
+                       return 0;
+       }
+#endif
 static void arcmsr_iop_init(struct AdapterControlBlock *acb)
 {
        uint32_t intmask_org;
@@ -2129,7 +2485,7 @@ static void arcmsr_iop_init(struct AdapterControlBlock *acb)
        intmask_org = arcmsr_disable_outbound_ints(acb);
        arcmsr_wait_firmware_ready(acb);
        arcmsr_iop_confirm(acb);
-       arcmsr_get_firmware_spec(acb);
+       arcmsr_get_firmware_spec(acb, 1);
        /*start background rebuild*/
        arcmsr_start_adapter_bgrb(acb);
        /* empty doorbell Qbuffer if door bell ringed */
@@ -2140,51 +2496,110 @@ static void arcmsr_iop_init(struct AdapterControlBlock *acb)
        acb->acb_flags |= ACB_F_IOP_INITED;
 }
 
-static void arcmsr_iop_reset(struct AdapterControlBlock *acb)
+static uint8_t arcmsr_iop_reset(struct AdapterControlBlock *acb)
 {
        struct CommandControlBlock *ccb;
        uint32_t intmask_org;
+       uint8_t rtnval = 0x00;
        int i = 0;
 
        if (atomic_read(&acb->ccboutstandingcount) != 0) {
+               /* disable all outbound interrupt */
+               intmask_org = arcmsr_disable_outbound_ints(acb);
                /* talk to iop 331 outstanding command aborted */
-               arcmsr_abort_allcmd(acb);
-
+               rtnval = arcmsr_abort_allcmd(acb);
                /* wait for 3 sec for all command aborted*/
                ssleep(3);
-
-               /* disable all outbound interrupt */
-               intmask_org = arcmsr_disable_outbound_ints(acb);
                /* clear all outbound posted Q */
                arcmsr_done4abort_postqueue(acb);
                for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
                        ccb = acb->pccb_pool[i];
                        if (ccb->startdone == ARCMSR_CCB_START) {
-                               ccb->startdone = ARCMSR_CCB_ABORTED;
                                arcmsr_ccb_complete(ccb, 1);
                        }
                }
+               atomic_set(&acb->ccboutstandingcount, 0);
                /* enable all outbound interrupt */
                arcmsr_enable_outbound_ints(acb, intmask_org);
+               return rtnval;
        }
+       return rtnval;
 }
 
 static int arcmsr_bus_reset(struct scsi_cmnd *cmd)
 {
        struct AdapterControlBlock *acb =
                (struct AdapterControlBlock *)cmd->device->host->hostdata;
-       int i;
+       int retry = 0;
 
-       acb->num_resets++;
+       if (acb->acb_flags & ACB_F_BUS_RESET)
+               return SUCCESS;
+
+       printk(KERN_NOTICE "arcmsr%d: bus reset ..... \n", acb->adapter_index);
        acb->acb_flags |= ACB_F_BUS_RESET;
-       for (i = 0; i < 400; i++) {
-               if (!atomic_read(&acb->ccboutstandingcount))
+       acb->num_resets++;
+       while (atomic_read(&acb->ccboutstandingcount) != 0 && retry < 4) {
+               arcmsr_interrupt(acb);
+               retry++;
+       }
+
+       if (arcmsr_iop_reset(acb)) {
+               switch (acb->adapter_type) {
+               case ACB_ADAPTER_TYPE_A: {
+                       printk(KERN_NOTICE "arcmsr%d: do hardware bus reset, num_resets = %d num_aborts = %d \n",
+                               acb->adapter_index, acb->num_resets, acb->num_aborts);
+                       arcmsr_hardware_reset(acb);
+                       acb->acb_flags |= ACB_F_FIRMWARE_TRAP;
+                       acb->acb_flags &= ~ACB_F_IOP_INITED;
+                       #ifdef CONFIG_SCSI_ARCMSR_RESET
+                       struct MessageUnit_A __iomem *reg = acb->pmuA;
+                       uint32_t intmask_org, outbound_doorbell;
+                       int retry_count = 0;
+sleep_again:
+                       arcmsr_sleep_for_bus_reset(cmd);
+                       if ((readl(&reg->outbound_msgaddr1) &
+                       ARCMSR_OUTBOUND_MESG1_FIRMWARE_OK) == 0) {
+                       printk(KERN_NOTICE "arcmsr%d: hardware bus reset and return busy, retry=%d \n",
+                       acb->host->host_no, retry_count);
+                       if (retry_count > retrycount) {
+                               printk(KERN_NOTICE "arcmsr%d: hardware bus reset and return busy, retry aborted \n",
+                               acb->host->host_no);
+                       return SUCCESS;
+                       }
+                       retry_count++;
+                       goto sleep_again;
+                       }
+                       acb->acb_flags &= ~ACB_F_FIRMWARE_TRAP;
+                       acb->acb_flags |= ACB_F_IOP_INITED;
+                       acb->acb_flags &= ~ACB_F_BUS_RESET;
+                       printk(KERN_NOTICE "arcmsr%d: hardware bus reset and reset ok \n",
+                               acb->host->host_no);
+                       /* disable all outbound interrupt */
+                       intmask_org = arcmsr_disable_outbound_ints(acb);
+                       arcmsr_get_firmware_spec(acb, 1);
+                       /*start background rebuild*/
+                       arcmsr_start_adapter_bgrb(acb);
+                       /* clear Qbuffer if door bell ringed */
+                       outbound_doorbell = readl(&reg->outbound_doorbell);
+                       writel(outbound_doorbell, &reg->outbound_doorbell); /*clear interrupt */
+                       writel(ARCMSR_INBOUND_DRIVER_DATA_READ_OK, &reg->inbound_doorbell);
+                       /* enable outbound Post Queue,outbound doorbell Interrupt */
+                       arcmsr_enable_outbound_ints(acb, intmask_org);
+                       atomic_set(&acb->rq_map_token, 16);
+                       init_timer(&acb->eternal_timer);
+                       acb->eternal_timer.expires = jiffies + msecs_to_jiffies(20*HZ);
+                       acb->eternal_timer.data = (unsigned long) acb;
+                       acb->eternal_timer.function = &arcmsr_request_device_map;
+                       add_timer(&acb->eternal_timer);
+                       #endif
+               }
                        break;
-               arcmsr_interrupt(acb);/* FIXME: need spinlock */
-               msleep(25);
+               case ACB_ADAPTER_TYPE_B: {
        }
-       arcmsr_iop_reset(acb);
+               }
+       } else {
        acb->acb_flags &= ~ACB_F_BUS_RESET;
+       }
        return SUCCESS;
 }
 
@@ -2277,98 +2692,3 @@ static const char *arcmsr_info(struct Scsi_Host *host)
                        ARCMSR_DRIVER_VERSION);
        return buf;
 }
-#ifdef CONFIG_SCSI_ARCMSR_AER
-static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev)
-{
-       struct Scsi_Host *host = pci_get_drvdata(pdev);
-       struct AdapterControlBlock *acb =
-               (struct AdapterControlBlock *) host->hostdata;
-       uint32_t intmask_org;
-       int i, j;
-
-       if (pci_enable_device(pdev)) {
-               return PCI_ERS_RESULT_DISCONNECT;
-       }
-       pci_set_master(pdev);
-       intmask_org = arcmsr_disable_outbound_ints(acb);
-       acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED |
-                          ACB_F_MESSAGE_RQBUFFER_CLEARED |
-                          ACB_F_MESSAGE_WQBUFFER_READED);
-       acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER;
-       for (i = 0; i < ARCMSR_MAX_TARGETID; i++)
-               for (j = 0; j < ARCMSR_MAX_TARGETLUN; j++)
-                       acb->devstate[i][j] = ARECA_RAID_GONE;
-
-       arcmsr_wait_firmware_ready(acb);
-       arcmsr_iop_confirm(acb);
-       /* disable all outbound interrupt */
-       arcmsr_get_firmware_spec(acb);
-       /*start background rebuild*/
-       arcmsr_start_adapter_bgrb(acb);
-       /* empty doorbell Qbuffer if door bell ringed */
-       arcmsr_clear_doorbell_queue_buffer(acb);
-       arcmsr_enable_eoi_mode(acb);
-       /* enable outbound Post Queue,outbound doorbell Interrupt */
-       arcmsr_enable_outbound_ints(acb, intmask_org);
-       acb->acb_flags |= ACB_F_IOP_INITED;
-
-       pci_enable_pcie_error_reporting(pdev);
-       return PCI_ERS_RESULT_RECOVERED;
-}
-
-static void arcmsr_pci_ers_need_reset_forepart(struct pci_dev *pdev)
-{
-       struct Scsi_Host *host = pci_get_drvdata(pdev);
-       struct AdapterControlBlock *acb = (struct AdapterControlBlock *)host->hostdata;
-       struct CommandControlBlock *ccb;
-       uint32_t intmask_org;
-       int i = 0;
-
-       if (atomic_read(&acb->ccboutstandingcount) != 0) {
-               /* talk to iop 331 outstanding command aborted */
-               arcmsr_abort_allcmd(acb);
-               /* wait for 3 sec for all command aborted*/
-               ssleep(3);
-               /* disable all outbound interrupt */
-               intmask_org = arcmsr_disable_outbound_ints(acb);
-               /* clear all outbound posted Q */
-               arcmsr_done4abort_postqueue(acb);
-               for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
-                       ccb = acb->pccb_pool[i];
-                       if (ccb->startdone == ARCMSR_CCB_START) {
-                               ccb->startdone = ARCMSR_CCB_ABORTED;
-                               arcmsr_ccb_complete(ccb, 1);
-                       }
-               }
-               /* enable all outbound interrupt */
-               arcmsr_enable_outbound_ints(acb, intmask_org);
-       }
-       pci_disable_device(pdev);
-}
-
-static void arcmsr_pci_ers_disconnect_forepart(struct pci_dev *pdev)
-{
-                       struct Scsi_Host *host = pci_get_drvdata(pdev);
-                       struct AdapterControlBlock *acb = \
-                               (struct AdapterControlBlock *)host->hostdata;
-
-                       arcmsr_stop_adapter_bgrb(acb);
-                       arcmsr_flush_adapter_cache(acb);
-}
-
-static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev,
-                                               pci_channel_state_t state)
-{
-       switch (state) {
-       case pci_channel_io_frozen:
-                       arcmsr_pci_ers_need_reset_forepart(pdev);
-                       return PCI_ERS_RESULT_NEED_RESET;
-       case pci_channel_io_perm_failure:
-                       arcmsr_pci_ers_disconnect_forepart(pdev);
-                       return PCI_ERS_RESULT_DISCONNECT;
-                       break;
-       default:
-                       return PCI_ERS_RESULT_NEED_RESET;
-         }
-}
-#endif
index e641922..350cbea 100644 (file)
@@ -167,10 +167,9 @@ unsigned char mgmt_invalidate_icds(struct beiscsi_hba *phba,
                                &nonemb_cmd.dma);
        if (nonemb_cmd.va == NULL) {
                SE_DEBUG(DBG_LVL_1,
-                        "Failed to allocate memory for"
-                        "mgmt_invalidate_icds \n");
+                        "Failed to allocate memory for mgmt_invalidate_icds\n");
                spin_unlock(&ctrl->mbox_lock);
-               return -1;
+               return 0;
        }
        nonemb_cmd.size = sizeof(struct invalidate_commands_params_in);
        req = nonemb_cmd.va;
index 0c08e18..3a7b3f8 100644 (file)
@@ -84,11 +84,32 @@ bfa_cfg_get_meminfo(struct bfa_iocfc_cfg_s *cfg, struct bfa_meminfo_s *meminfo)
        for (i = 0; hal_mods[i]; i++)
                hal_mods[i]->meminfo(cfg, &km_len, &dm_len);
 
+       dm_len += bfa_port_meminfo();
 
        meminfo->meminfo[BFA_MEM_TYPE_KVA - 1].mem_len = km_len;
        meminfo->meminfo[BFA_MEM_TYPE_DMA - 1].mem_len = dm_len;
 }
 
+static void
+bfa_com_port_attach(struct bfa_s *bfa, struct bfa_meminfo_s *mi)
+{
+       struct bfa_port_s       *port = &bfa->modules.port;
+       uint32_t                dm_len;
+       uint8_t                 *dm_kva;
+       uint64_t                dm_pa;
+
+       dm_len = bfa_port_meminfo();
+       dm_kva = bfa_meminfo_dma_virt(mi);
+       dm_pa  = bfa_meminfo_dma_phys(mi);
+
+       memset(port, 0, sizeof(struct bfa_port_s));
+       bfa_port_attach(port, &bfa->ioc, bfa, bfa->trcmod, bfa->logm);
+       bfa_port_mem_claim(port, dm_kva, dm_pa);
+
+       bfa_meminfo_dma_virt(mi) = dm_kva + dm_len;
+       bfa_meminfo_dma_phys(mi) = dm_pa + dm_len;
+}
+
 /**
  * Use this function to do attach the driver instance with the BFA
  * library. This function will not trigger any HW initialization
@@ -140,6 +161,7 @@ bfa_attach(struct bfa_s *bfa, void *bfad, struct bfa_iocfc_cfg_s *cfg,
        for (i = 0; hal_mods[i]; i++)
                hal_mods[i]->attach(bfa, bfad, cfg, meminfo, pcidev);
 
+       bfa_com_port_attach(bfa, meminfo);
 }
 
 /**
index 18b7102..2ce26eb 100644 (file)
@@ -1,36 +1,35 @@
 #include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/zorro.h>
 
-#include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/amigaints.h>
 #include <asm/amigahw.h>
-#include <linux/zorro.h>
-#include <asm/irq.h>
-#include <linux/spinlock.h>
 
 #include "scsi.h"
-#include <scsi/scsi_host.h>
 #include "wd33c93.h"
 #include "gvp11.h"
 
-#include <linux/stat.h>
 
+#define CHECK_WD33C93
 
-#define DMA(ptr)       ((gvp11_scsiregs *)((ptr)->base))
+struct gvp11_hostdata {
+       struct WD33C93_hostdata wh;
+       struct gvp11_scsiregs *regs;
+};
 
-static irqreturn_t gvp11_intr(int irq, void *_instance)
+static irqreturn_t gvp11_intr(int irq, void *data)
 {
+       struct Scsi_Host *instance = data;
+       struct gvp11_hostdata *hdata = shost_priv(instance);
+       unsigned int status = hdata->regs->CNTR;
        unsigned long flags;
-       unsigned int status;
-       struct Scsi_Host *instance = (struct Scsi_Host *)_instance;
 
-       status = DMA(instance)->CNTR;
        if (!(status & GVP11_DMAC_INT_PENDING))
                return IRQ_NONE;
 
@@ -50,64 +49,66 @@ void gvp11_setup(char *str, int *ints)
 static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
        struct Scsi_Host *instance = cmd->device->host;
-       struct WD33C93_hostdata *hdata = shost_priv(instance);
+       struct gvp11_hostdata *hdata = shost_priv(instance);
+       struct WD33C93_hostdata *wh = &hdata->wh;
+       struct gvp11_scsiregs *regs = hdata->regs;
        unsigned short cntr = GVP11_DMAC_INT_ENABLE;
        unsigned long addr = virt_to_bus(cmd->SCp.ptr);
        int bank_mask;
        static int scsi_alloc_out_of_range = 0;
 
        /* use bounce buffer if the physical address is bad */
-       if (addr & hdata->dma_xfer_mask) {
-               hdata->dma_bounce_len = (cmd->SCp.this_residual + 511) & ~0x1ff;
+       if (addr & wh->dma_xfer_mask) {
+               wh->dma_bounce_len = (cmd->SCp.this_residual + 511) & ~0x1ff;
 
                if (!scsi_alloc_out_of_range) {
-                       hdata->dma_bounce_buffer =
-                               kmalloc(hdata->dma_bounce_len, GFP_KERNEL);
-                       hdata->dma_buffer_pool = BUF_SCSI_ALLOCED;
+                       wh->dma_bounce_buffer =
+                               kmalloc(wh->dma_bounce_len, GFP_KERNEL);
+                       wh->dma_buffer_pool = BUF_SCSI_ALLOCED;
                }
 
                if (scsi_alloc_out_of_range ||
-                   !hdata->dma_bounce_buffer) {
-                       hdata->dma_bounce_buffer =
-                               amiga_chip_alloc(hdata->dma_bounce_len,
+                   !wh->dma_bounce_buffer) {
+                       wh->dma_bounce_buffer =
+                               amiga_chip_alloc(wh->dma_bounce_len,
                                                 "GVP II SCSI Bounce Buffer");
 
-                       if (!hdata->dma_bounce_buffer) {
-                               hdata->dma_bounce_len = 0;
+                       if (!wh->dma_bounce_buffer) {
+                               wh->dma_bounce_len = 0;
                                return 1;
                        }
 
-                       hdata->dma_buffer_pool = BUF_CHIP_ALLOCED;
+                       wh->dma_buffer_pool = BUF_CHIP_ALLOCED;
                }
 
                /* check if the address of the bounce buffer is OK */
-               addr = virt_to_bus(hdata->dma_bounce_buffer);
+               addr = virt_to_bus(wh->dma_bounce_buffer);
 
-               if (addr & hdata->dma_xfer_mask) {
+               if (addr & wh->dma_xfer_mask) {
                        /* fall back to Chip RAM if address out of range */
-                       if (hdata->dma_buffer_pool == BUF_SCSI_ALLOCED) {
-                               kfree(hdata->dma_bounce_buffer);
+                       if (wh->dma_buffer_pool == BUF_SCSI_ALLOCED) {
+                               kfree(wh->dma_bounce_buffer);
                                scsi_alloc_out_of_range = 1;
                        } else {
-                               amiga_chip_free(hdata->dma_bounce_buffer);
+                               amiga_chip_free(wh->dma_bounce_buffer);
                        }
 
-                       hdata->dma_bounce_buffer =
-                               amiga_chip_alloc(hdata->dma_bounce_len,
+                       wh->dma_bounce_buffer =
+                               amiga_chip_alloc(wh->dma_bounce_len,
                                                 "GVP II SCSI Bounce Buffer");
 
-                       if (!hdata->dma_bounce_buffer) {
-                               hdata->dma_bounce_len = 0;
+                       if (!wh->dma_bounce_buffer) {
+                               wh->dma_bounce_len = 0;
                                return 1;
                        }
 
-                       addr = virt_to_bus(hdata->dma_bounce_buffer);
-                       hdata->dma_buffer_pool = BUF_CHIP_ALLOCED;
+                       addr = virt_to_bus(wh->dma_bounce_buffer);
+                       wh->dma_buffer_pool = BUF_CHIP_ALLOCED;
                }
 
                if (!dir_in) {
                        /* copy to bounce buffer for a write */
-                       memcpy(hdata->dma_bounce_buffer, cmd->SCp.ptr,
+                       memcpy(wh->dma_bounce_buffer, cmd->SCp.ptr,
                               cmd->SCp.this_residual);
                }
        }
@@ -116,11 +117,11 @@ static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
        if (!dir_in)
                cntr |= GVP11_DMAC_DIR_WRITE;
 
-       hdata->dma_dir = dir_in;
-       DMA(cmd->device->host)->CNTR = cntr;
+       wh->dma_dir = dir_in;
+       regs->CNTR = cntr;
 
        /* setup DMA *physical* address */
-       DMA(cmd->device->host)->ACR = addr;
+       regs->ACR = addr;
 
        if (dir_in) {
                /* invalidate any cache */
@@ -130,12 +131,12 @@ static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
                cache_push(addr, cmd->SCp.this_residual);
        }
 
-       bank_mask = (~hdata->dma_xfer_mask >> 18) & 0x01c0;
+       bank_mask = (~wh->dma_xfer_mask >> 18) & 0x01c0;
        if (bank_mask)
-               DMA(cmd->device->host)->BANK = bank_mask & (addr >> 18);
+               regs->BANK = bank_mask & (addr >> 18);
 
        /* start DMA */
-       DMA(cmd->device->host)->ST_DMA = 1;
+       regs->ST_DMA = 1;
 
        /* return success */
        return 0;
@@ -144,236 +145,53 @@ static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
                     int status)
 {
-       struct WD33C93_hostdata *hdata = shost_priv(instance);
+       struct gvp11_hostdata *hdata = shost_priv(instance);
+       struct WD33C93_hostdata *wh = &hdata->wh;
+       struct gvp11_scsiregs *regs = hdata->regs;
 
        /* stop DMA */
-       DMA(instance)->SP_DMA = 1;
+       regs->SP_DMA = 1;
        /* remove write bit from CONTROL bits */
-       DMA(instance)->CNTR = GVP11_DMAC_INT_ENABLE;
+       regs->CNTR = GVP11_DMAC_INT_ENABLE;
 
        /* copy from a bounce buffer, if necessary */
-       if (status && hdata->dma_bounce_buffer) {
-               if (hdata->dma_dir && SCpnt)
-                       memcpy(SCpnt->SCp.ptr, hdata->dma_bounce_buffer,
+       if (status && wh->dma_bounce_buffer) {
+               if (wh->dma_dir && SCpnt)
+                       memcpy(SCpnt->SCp.ptr, wh->dma_bounce_buffer,
                               SCpnt->SCp.this_residual);
 
-               if (hdata->dma_buffer_pool == BUF_SCSI_ALLOCED)
-                       kfree(hdata->dma_bounce_buffer);
-               else
-                       amiga_chip_free(hdata->dma_bounce_buffer);
-
-               hdata->dma_bounce_buffer = NULL;
-               hdata->dma_bounce_len = 0;
-       }
-}
-
-#define CHECK_WD33C93
-
-int __init gvp11_detect(struct scsi_host_template *tpnt)
-{
-       static unsigned char called = 0;
-       struct Scsi_Host *instance;
-       unsigned long address;
-       unsigned int epc;
-       struct zorro_dev *z = NULL;
-       unsigned int default_dma_xfer_mask;
-       struct WD33C93_hostdata *hdata;
-       wd33c93_regs regs;
-       int num_gvp11 = 0;
-#ifdef CHECK_WD33C93
-       volatile unsigned char *sasr_3393, *scmd_3393;
-       unsigned char save_sasr;
-       unsigned char q, qq;
-#endif
-
-       if (!MACH_IS_AMIGA || called)
-               return 0;
-       called = 1;
-
-       tpnt->proc_name = "GVP11";
-       tpnt->proc_info = &wd33c93_proc_info;
-
-       while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
-               /*
-                * This should (hopefully) be the correct way to identify
-                * all the different GVP SCSI controllers (except for the
-                * SERIES I though).
-                */
-
-               if (z->id == ZORRO_PROD_GVP_COMBO_030_R3_SCSI ||
-                   z->id == ZORRO_PROD_GVP_SERIES_II)
-                       default_dma_xfer_mask = ~0x00ffffff;
-               else if (z->id == ZORRO_PROD_GVP_GFORCE_030_SCSI ||
-                        z->id == ZORRO_PROD_GVP_A530_SCSI ||
-                        z->id == ZORRO_PROD_GVP_COMBO_030_R4_SCSI)
-                       default_dma_xfer_mask = ~0x01ffffff;
-               else if (z->id == ZORRO_PROD_GVP_A1291 ||
-                        z->id == ZORRO_PROD_GVP_GFORCE_040_SCSI_1)
-                       default_dma_xfer_mask = ~0x07ffffff;
+               if (wh->dma_buffer_pool == BUF_SCSI_ALLOCED)
+                       kfree(wh->dma_bounce_buffer);
                else
-                       continue;
-
-               /*
-                * Rumors state that some GVP ram boards use the same product
-                * code as the SCSI controllers. Therefore if the board-size
-                * is not 64KB we asume it is a ram board and bail out.
-                */
-               if (z->resource.end - z->resource.start != 0xffff)
-                       continue;
+                       amiga_chip_free(wh->dma_bounce_buffer);
 
-               address = z->resource.start;
-               if (!request_mem_region(address, 256, "wd33c93"))
-                       continue;
-
-#ifdef CHECK_WD33C93
-
-               /*
-                * These darn GVP boards are a problem - it can be tough to tell
-                * whether or not they include a SCSI controller. This is the
-                * ultimate Yet-Another-GVP-Detection-Hack in that it actually
-                * probes for a WD33c93 chip: If we find one, it's extremely
-                * likely that this card supports SCSI, regardless of Product_
-                * Code, Board_Size, etc.
-                */
-
-               /* Get pointers to the presumed register locations and save contents */
-
-               sasr_3393 = &(((gvp11_scsiregs *)(ZTWO_VADDR(address)))->SASR);
-               scmd_3393 = &(((gvp11_scsiregs *)(ZTWO_VADDR(address)))->SCMD);
-               save_sasr = *sasr_3393;
-
-               /* First test the AuxStatus Reg */
-
-               q = *sasr_3393; /* read it */
-               if (q & 0x08)   /* bit 3 should always be clear */
-                       goto release;
-               *sasr_3393 = WD_AUXILIARY_STATUS;       /* setup indirect address */
-               if (*sasr_3393 == WD_AUXILIARY_STATUS) {        /* shouldn't retain the write */
-                       *sasr_3393 = save_sasr; /* Oops - restore this byte */
-                       goto release;
-               }
-               if (*sasr_3393 != q) {  /* should still read the same */
-                       *sasr_3393 = save_sasr; /* Oops - restore this byte */
-                       goto release;
-               }
-               if (*scmd_3393 != q)    /* and so should the image at 0x1f */
-                       goto release;
-
-               /*
-                * Ok, we probably have a wd33c93, but let's check a few other places
-                * for good measure. Make sure that this works for both 'A and 'B
-                * chip versions.
-                */
-
-               *sasr_3393 = WD_SCSI_STATUS;
-               q = *scmd_3393;
-               *sasr_3393 = WD_SCSI_STATUS;
-               *scmd_3393 = ~q;
-               *sasr_3393 = WD_SCSI_STATUS;
-               qq = *scmd_3393;
-               *sasr_3393 = WD_SCSI_STATUS;
-               *scmd_3393 = q;
-               if (qq != q)    /* should be read only */
-                       goto release;
-               *sasr_3393 = 0x1e;      /* this register is unimplemented */
-               q = *scmd_3393;
-               *sasr_3393 = 0x1e;
-               *scmd_3393 = ~q;
-               *sasr_3393 = 0x1e;
-               qq = *scmd_3393;
-               *sasr_3393 = 0x1e;
-               *scmd_3393 = q;
-               if (qq != q || qq != 0xff)      /* should be read only, all 1's */
-                       goto release;
-               *sasr_3393 = WD_TIMEOUT_PERIOD;
-               q = *scmd_3393;
-               *sasr_3393 = WD_TIMEOUT_PERIOD;
-               *scmd_3393 = ~q;
-               *sasr_3393 = WD_TIMEOUT_PERIOD;
-               qq = *scmd_3393;
-               *sasr_3393 = WD_TIMEOUT_PERIOD;
-               *scmd_3393 = q;
-               if (qq != (~q & 0xff))  /* should be read/write */
-                       goto release;
-#endif
-
-               instance = scsi_register(tpnt, sizeof(struct WD33C93_hostdata));
-               if (instance == NULL)
-                       goto release;
-               instance->base = ZTWO_VADDR(address);
-               instance->irq = IRQ_AMIGA_PORTS;
-               instance->unique_id = z->slotaddr;
-
-               hdata = shost_priv(instance);
-               if (gvp11_xfer_mask)
-                       hdata->dma_xfer_mask = gvp11_xfer_mask;
-               else
-                       hdata->dma_xfer_mask = default_dma_xfer_mask;
-
-               DMA(instance)->secret2 = 1;
-               DMA(instance)->secret1 = 0;
-               DMA(instance)->secret3 = 15;
-               while (DMA(instance)->CNTR & GVP11_DMAC_BUSY)
-                       ;
-               DMA(instance)->CNTR = 0;
-
-               DMA(instance)->BANK = 0;
-
-               epc = *(unsigned short *)(ZTWO_VADDR(address) + 0x8000);
-
-               /*
-                * Check for 14MHz SCSI clock
-                */
-               regs.SASR = &(DMA(instance)->SASR);
-               regs.SCMD = &(DMA(instance)->SCMD);
-               hdata->no_sync = 0xff;
-               hdata->fast = 0;
-               hdata->dma_mode = CTRL_DMA;
-               wd33c93_init(instance, regs, dma_setup, dma_stop,
-                            (epc & GVP_SCSICLKMASK) ? WD33C93_FS_8_10
-                                                    : WD33C93_FS_12_15);
-
-               if (request_irq(IRQ_AMIGA_PORTS, gvp11_intr, IRQF_SHARED,
-                               "GVP11 SCSI", instance))
-                       goto unregister;
-               DMA(instance)->CNTR = GVP11_DMAC_INT_ENABLE;
-               num_gvp11++;
-               continue;
-
-unregister:
-               scsi_unregister(instance);
-release:
-               release_mem_region(address, 256);
+               wh->dma_bounce_buffer = NULL;
+               wh->dma_bounce_len = 0;
        }
-
-       return num_gvp11;
 }
 
 static int gvp11_bus_reset(struct scsi_cmnd *cmd)
 {
+       struct Scsi_Host *instance = cmd->device->host;
+
        /* FIXME perform bus-specific reset */
 
        /* FIXME 2: shouldn't we no-op this function (return
           FAILED), and fall back to host reset function,
           wd33c93_host_reset ? */
 
-       spin_lock_irq(cmd->device->host->host_lock);
+       spin_lock_irq(instance->host_lock);
        wd33c93_host_reset(cmd);
-       spin_unlock_irq(cmd->device->host->host_lock);
+       spin_unlock_irq(instance->host_lock);
 
        return SUCCESS;
 }
 
-
-#define HOSTS_C
-
-#include "gvp11.h"
-
-static struct scsi_host_template driver_template = {
-       .proc_name              = "GVP11",
+static struct scsi_host_template gvp11_scsi_template = {
+       .module                 = THIS_MODULE,
        .name                   = "GVP Series II SCSI",
-       .detect                 = gvp11_detect,
-       .release                = gvp11_release,
+       .proc_info              = wd33c93_proc_info,
+       .proc_name              = "GVP11",
        .queuecommand           = wd33c93_queuecommand,
        .eh_abort_handler       = wd33c93_abort,
        .eh_bus_reset_handler   = gvp11_bus_reset,
@@ -385,17 +203,230 @@ static struct scsi_host_template driver_template = {
        .use_clustering         = DISABLE_CLUSTERING
 };
 
+static int __devinit check_wd33c93(struct gvp11_scsiregs *regs)
+{
+#ifdef CHECK_WD33C93
+       volatile unsigned char *sasr_3393, *scmd_3393;
+       unsigned char save_sasr;
+       unsigned char q, qq;
 
-#include "scsi_module.c"
+       /*
+        * These darn GVP boards are a problem - it can be tough to tell
+        * whether or not they include a SCSI controller. This is the
+        * ultimate Yet-Another-GVP-Detection-Hack in that it actually
+        * probes for a WD33c93 chip: If we find one, it's extremely
+        * likely that this card supports SCSI, regardless of Product_
+        * Code, Board_Size, etc.
+        */
+
+       /* Get pointers to the presumed register locations and save contents */
+
+       sasr_3393 = &regs->SASR;
+       scmd_3393 = &regs->SCMD;
+       save_sasr = *sasr_3393;
+
+       /* First test the AuxStatus Reg */
+
+       q = *sasr_3393; /* read it */
+       if (q & 0x08)   /* bit 3 should always be clear */
+               return -ENODEV;
+       *sasr_3393 = WD_AUXILIARY_STATUS;       /* setup indirect address */
+       if (*sasr_3393 == WD_AUXILIARY_STATUS) {        /* shouldn't retain the write */
+               *sasr_3393 = save_sasr; /* Oops - restore this byte */
+               return -ENODEV;
+       }
+       if (*sasr_3393 != q) {  /* should still read the same */
+               *sasr_3393 = save_sasr; /* Oops - restore this byte */
+               return -ENODEV;
+       }
+       if (*scmd_3393 != q)    /* and so should the image at 0x1f */
+               return -ENODEV;
+
+       /*
+        * Ok, we probably have a wd33c93, but let's check a few other places
+        * for good measure. Make sure that this works for both 'A and 'B
+        * chip versions.
+        */
+
+       *sasr_3393 = WD_SCSI_STATUS;
+       q = *scmd_3393;
+       *sasr_3393 = WD_SCSI_STATUS;
+       *scmd_3393 = ~q;
+       *sasr_3393 = WD_SCSI_STATUS;
+       qq = *scmd_3393;
+       *sasr_3393 = WD_SCSI_STATUS;
+       *scmd_3393 = q;
+       if (qq != q)    /* should be read only */
+               return -ENODEV;
+       *sasr_3393 = 0x1e;      /* this register is unimplemented */
+       q = *scmd_3393;
+       *sasr_3393 = 0x1e;
+       *scmd_3393 = ~q;
+       *sasr_3393 = 0x1e;
+       qq = *scmd_3393;
+       *sasr_3393 = 0x1e;
+       *scmd_3393 = q;
+       if (qq != q || qq != 0xff)      /* should be read only, all 1's */
+               return -ENODEV;
+       *sasr_3393 = WD_TIMEOUT_PERIOD;
+       q = *scmd_3393;
+       *sasr_3393 = WD_TIMEOUT_PERIOD;
+       *scmd_3393 = ~q;
+       *sasr_3393 = WD_TIMEOUT_PERIOD;
+       qq = *scmd_3393;
+       *sasr_3393 = WD_TIMEOUT_PERIOD;
+       *scmd_3393 = q;
+       if (qq != (~q & 0xff))  /* should be read/write */
+               return -ENODEV;
+#endif /* CHECK_WD33C93 */
 
-int gvp11_release(struct Scsi_Host *instance)
+       return 0;
+}
+
+static int __devinit gvp11_probe(struct zorro_dev *z,
+                                const struct zorro_device_id *ent)
 {
-#ifdef MODULE
-       DMA(instance)->CNTR = 0;
-       release_mem_region(ZTWO_PADDR(instance->base), 256);
+       struct Scsi_Host *instance;
+       unsigned long address;
+       int error;
+       unsigned int epc;
+       unsigned int default_dma_xfer_mask;
+       struct gvp11_hostdata *hdata;
+       struct gvp11_scsiregs *regs;
+       wd33c93_regs wdregs;
+
+       default_dma_xfer_mask = ent->driver_data;
+
+       /*
+        * Rumors state that some GVP ram boards use the same product
+        * code as the SCSI controllers. Therefore if the board-size
+        * is not 64KB we asume it is a ram board and bail out.
+        */
+       if (zorro_resource_len(z) != 0x10000)
+               return -ENODEV;
+
+       address = z->resource.start;
+       if (!request_mem_region(address, 256, "wd33c93"))
+               return -EBUSY;
+
+       regs = (struct gvp11_scsiregs *)(ZTWO_VADDR(address));
+
+       error = check_wd33c93(regs);
+       if (error)
+               goto fail_check_or_alloc;
+
+       instance = scsi_host_alloc(&gvp11_scsi_template,
+                                  sizeof(struct gvp11_hostdata));
+       if (!instance) {
+               error = -ENOMEM;
+               goto fail_check_or_alloc;
+       }
+
+       instance->irq = IRQ_AMIGA_PORTS;
+       instance->unique_id = z->slotaddr;
+
+       regs->secret2 = 1;
+       regs->secret1 = 0;
+       regs->secret3 = 15;
+       while (regs->CNTR & GVP11_DMAC_BUSY)
+               ;
+       regs->CNTR = 0;
+       regs->BANK = 0;
+
+       wdregs.SASR = &regs->SASR;
+       wdregs.SCMD = &regs->SCMD;
+
+       hdata = shost_priv(instance);
+       if (gvp11_xfer_mask)
+               hdata->wh.dma_xfer_mask = gvp11_xfer_mask;
+       else
+               hdata->wh.dma_xfer_mask = default_dma_xfer_mask;
+
+       hdata->wh.no_sync = 0xff;
+       hdata->wh.fast = 0;
+       hdata->wh.dma_mode = CTRL_DMA;
+       hdata->regs = regs;
+
+       /*
+        * Check for 14MHz SCSI clock
+        */
+       epc = *(unsigned short *)(ZTWO_VADDR(address) + 0x8000);
+       wd33c93_init(instance, wdregs, dma_setup, dma_stop,
+                    (epc & GVP_SCSICLKMASK) ? WD33C93_FS_8_10
+                                            : WD33C93_FS_12_15);
+
+       error = request_irq(IRQ_AMIGA_PORTS, gvp11_intr, IRQF_SHARED,
+                           "GVP11 SCSI", instance);
+       if (error)
+               goto fail_irq;
+
+       regs->CNTR = GVP11_DMAC_INT_ENABLE;
+
+       error = scsi_add_host(instance, NULL);
+       if (error)
+               goto fail_host;
+
+       zorro_set_drvdata(z, instance);
+       scsi_scan_host(instance);
+       return 0;
+
+fail_host:
        free_irq(IRQ_AMIGA_PORTS, instance);
-#endif
-       return 1;
+fail_irq:
+       scsi_host_put(instance);
+fail_check_or_alloc:
+       release_mem_region(address, 256);
+       return error;
+}
+
+static void __devexit gvp11_remove(struct zorro_dev *z)
+{
+       struct Scsi_Host *instance = zorro_get_drvdata(z);
+       struct gvp11_hostdata *hdata = shost_priv(instance);
+
+       hdata->regs->CNTR = 0;
+       scsi_remove_host(instance);
+       free_irq(IRQ_AMIGA_PORTS, instance);
+       scsi_host_put(instance);
+       release_mem_region(z->resource.start, 256);
+}
+
+       /*
+        * This should (hopefully) be the correct way to identify
+        * all the different GVP SCSI controllers (except for the
+        * SERIES I though).
+        */
+
+static struct zorro_device_id gvp11_zorro_tbl[] __devinitdata = {
+       { ZORRO_PROD_GVP_COMBO_030_R3_SCSI,     ~0x00ffffff },
+       { ZORRO_PROD_GVP_SERIES_II,             ~0x00ffffff },
+       { ZORRO_PROD_GVP_GFORCE_030_SCSI,       ~0x01ffffff },
+       { ZORRO_PROD_GVP_A530_SCSI,             ~0x01ffffff },
+       { ZORRO_PROD_GVP_COMBO_030_R4_SCSI,     ~0x01ffffff },
+       { ZORRO_PROD_GVP_A1291,                 ~0x07ffffff },
+       { ZORRO_PROD_GVP_GFORCE_040_SCSI_1,     ~0x07ffffff },
+       { 0 }
+};
+MODULE_DEVICE_TABLE(zorro, gvp11_zorro_tbl);
+
+static struct zorro_driver gvp11_driver = {
+       .name           = "gvp11",
+       .id_table       = gvp11_zorro_tbl,
+       .probe          = gvp11_probe,
+       .remove         = __devexit_p(gvp11_remove),
+};
+
+static int __init gvp11_init(void)
+{
+       return zorro_register_driver(&gvp11_driver);
+}
+module_init(gvp11_init);
+
+static void __exit gvp11_exit(void)
+{
+       zorro_unregister_driver(&gvp11_driver);
 }
+module_exit(gvp11_exit);
 
+MODULE_DESCRIPTION("GVP Series II SCSI");
 MODULE_LICENSE("GPL");
index e2efdf9..852913c 100644 (file)
@@ -11,9 +11,6 @@
 
 #include <linux/types.h>
 
-int gvp11_detect(struct scsi_host_template *);
-int gvp11_release(struct Scsi_Host *);
-
 #ifndef CMD_PER_LUN
 #define CMD_PER_LUN            2
 #endif
@@ -22,15 +19,13 @@ int gvp11_release(struct Scsi_Host *);
 #define CAN_QUEUE              16
 #endif
 
-#ifndef HOSTS_C
-
 /*
  * if the transfer address ANDed with this results in a non-zero
  * result, then we can't use DMA.
  */
 #define GVP11_XFER_MASK                (0xff000001)
 
-typedef struct {
+struct gvp11_scsiregs {
                 unsigned char  pad1[64];
        volatile unsigned short CNTR;
                 unsigned char  pad2[31];
@@ -46,7 +41,7 @@ typedef struct {
        volatile unsigned short SP_DMA;
        volatile unsigned short secret2; /* store 1 here */
        volatile unsigned short secret3; /* store 15 here */
-} gvp11_scsiregs;
+};
 
 /* bits in CNTR */
 #define GVP11_DMAC_BUSY                (1<<0)
@@ -54,6 +49,4 @@ typedef struct {
 #define GVP11_DMAC_INT_ENABLE  (1<<3)
 #define GVP11_DMAC_DIR_WRITE   (1<<4)
 
-#endif /* else def HOSTS_C */
-
 #endif /* GVP11_H */
index 6a6661c..82ea4a8 100644 (file)
@@ -567,7 +567,8 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd,
 static void ipr_reinit_ipr_cmnd(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb;
-       struct ipr_ioasa *ioasa = &ipr_cmd->ioasa;
+       struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa;
+       struct ipr_ioasa64 *ioasa64 = &ipr_cmd->s.ioasa64;
        dma_addr_t dma_addr = ipr_cmd->dma_addr;
 
        memset(&ioarcb->cmd_pkt, 0, sizeof(struct ipr_cmd_pkt));
@@ -576,19 +577,19 @@ static void ipr_reinit_ipr_cmnd(struct ipr_cmnd *ipr_cmd)
        ioarcb->ioadl_len = 0;
        ioarcb->read_ioadl_len = 0;
 
-       if (ipr_cmd->ioa_cfg->sis64)
+       if (ipr_cmd->ioa_cfg->sis64) {
                ioarcb->u.sis64_addr_data.data_ioadl_addr =
                        cpu_to_be64(dma_addr + offsetof(struct ipr_cmnd, i.ioadl64));
-       else {
+               ioasa64->u.gata.status = 0;
+       } else {
                ioarcb->write_ioadl_addr =
                        cpu_to_be32(dma_addr + offsetof(struct ipr_cmnd, i.ioadl));
                ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr;
+               ioasa->u.gata.status = 0;
        }
 
-       ioasa->ioasc = 0;
-       ioasa->residual_data_len = 0;
-       ioasa->u.gata.status = 0;
-
+       ioasa->hdr.ioasc = 0;
+       ioasa->hdr.residual_data_len = 0;
        ipr_cmd->scsi_cmd = NULL;
        ipr_cmd->qc = NULL;
        ipr_cmd->sense_buffer[0] = 0;
@@ -768,8 +769,8 @@ static void ipr_fail_all_ops(struct ipr_ioa_cfg *ioa_cfg)
        list_for_each_entry_safe(ipr_cmd, temp, &ioa_cfg->pending_q, queue) {
                list_del(&ipr_cmd->queue);
 
-               ipr_cmd->ioasa.ioasc = cpu_to_be32(IPR_IOASC_IOA_WAS_RESET);
-               ipr_cmd->ioasa.ilid = cpu_to_be32(IPR_DRIVER_ILID);
+               ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_IOA_WAS_RESET);
+               ipr_cmd->s.ioasa.hdr.ilid = cpu_to_be32(IPR_DRIVER_ILID);
 
                if (ipr_cmd->scsi_cmd)
                        ipr_cmd->done = ipr_scsi_eh_done;
@@ -1040,7 +1041,7 @@ static void ipr_init_res_entry(struct ipr_resource_entry *res,
                proto = cfgtew->u.cfgte64->proto;
                res->res_flags = cfgtew->u.cfgte64->res_flags;
                res->qmodel = IPR_QUEUEING_MODEL64(res);
-               res->type = cfgtew->u.cfgte64->res_type & 0x0f;
+               res->type = cfgtew->u.cfgte64->res_type;
 
                memcpy(res->res_path, &cfgtew->u.cfgte64->res_path,
                        sizeof(res->res_path));
@@ -1319,7 +1320,7 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        struct ipr_hostrcb *hostrcb = ipr_cmd->u.hostrcb;
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
        list_del(&hostrcb->queue);
        list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
@@ -2354,7 +2355,7 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        struct ipr_hostrcb *hostrcb = ipr_cmd->u.hostrcb;
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
        u32 fd_ioasc;
 
        if (ioa_cfg->sis64)
@@ -4509,11 +4510,16 @@ static int ipr_device_reset(struct ipr_ioa_cfg *ioa_cfg,
        }
 
        ipr_send_blocking_cmd(ipr_cmd, ipr_timeout, IPR_DEVICE_RESET_TIMEOUT);
-       ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
        list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
-       if (ipr_is_gata(res) && res->sata_port && ioasc != IPR_IOASC_IOA_WAS_RESET)
-               memcpy(&res->sata_port->ioasa, &ipr_cmd->ioasa.u.gata,
-                      sizeof(struct ipr_ioasa_gata));
+       if (ipr_is_gata(res) && res->sata_port && ioasc != IPR_IOASC_IOA_WAS_RESET) {
+               if (ipr_cmd->ioa_cfg->sis64)
+                       memcpy(&res->sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata,
+                              sizeof(struct ipr_ioasa_gata));
+               else
+                       memcpy(&res->sata_port->ioasa, &ipr_cmd->s.ioasa.u.gata,
+                              sizeof(struct ipr_ioasa_gata));
+       }
 
        LEAVE;
        return (IPR_IOASC_SENSE_KEY(ioasc) ? -EIO : 0);
@@ -4768,7 +4774,7 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd)
        scmd_printk(KERN_ERR, scsi_cmd, "Aborting command: %02X\n",
                    scsi_cmd->cmnd[0]);
        ipr_send_blocking_cmd(ipr_cmd, ipr_abort_timeout, IPR_CANCEL_ALL_TIMEOUT);
-       ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
        /*
         * If the abort task timed out and we sent a bus reset, we will get
@@ -4812,15 +4818,39 @@ static int ipr_eh_abort(struct scsi_cmnd * scsi_cmd)
 /**
  * ipr_handle_other_interrupt - Handle "other" interrupts
  * @ioa_cfg:   ioa config struct
- * @int_reg:   interrupt register
  *
  * Return value:
  *     IRQ_NONE / IRQ_HANDLED
  **/
-static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg,
-                                             volatile u32 int_reg)
+static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg)
 {
        irqreturn_t rc = IRQ_HANDLED;
+       volatile u32 int_reg, int_mask_reg;
+
+       int_mask_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg32);
+       int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32) & ~int_mask_reg;
+
+       /* If an interrupt on the adapter did not occur, ignore it.
+        * Or in the case of SIS 64, check for a stage change interrupt.
+        */
+       if ((int_reg & IPR_PCII_OPER_INTERRUPTS) == 0) {
+               if (ioa_cfg->sis64) {
+                       int_mask_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg);
+                       int_reg = readl(ioa_cfg->regs.sense_interrupt_reg) & ~int_mask_reg;
+                       if (int_reg & IPR_PCII_IPL_STAGE_CHANGE) {
+
+                               /* clear stage change */
+                               writel(IPR_PCII_IPL_STAGE_CHANGE, ioa_cfg->regs.clr_interrupt_reg);
+                               int_reg = readl(ioa_cfg->regs.sense_interrupt_reg) & ~int_mask_reg;
+                               list_del(&ioa_cfg->reset_cmd->queue);
+                               del_timer(&ioa_cfg->reset_cmd->timer);
+                               ipr_reset_ioa_job(ioa_cfg->reset_cmd);
+                               return IRQ_HANDLED;
+                       }
+               }
+
+               return IRQ_NONE;
+       }
 
        if (int_reg & IPR_PCII_IOA_TRANS_TO_OPER) {
                /* Mask the interrupt */
@@ -4881,7 +4911,7 @@ static irqreturn_t ipr_isr(int irq, void *devp)
 {
        struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)devp;
        unsigned long lock_flags = 0;
-       volatile u32 int_reg, int_mask_reg;
+       volatile u32 int_reg;
        u32 ioasc;
        u16 cmd_index;
        int num_hrrq = 0;
@@ -4896,33 +4926,6 @@ static irqreturn_t ipr_isr(int irq, void *devp)
                return IRQ_NONE;
        }
 
-       int_mask_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg32);
-       int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32) & ~int_mask_reg;
-
-       /* If an interrupt on the adapter did not occur, ignore it.
-        * Or in the case of SIS 64, check for a stage change interrupt.
-        */
-       if (unlikely((int_reg & IPR_PCII_OPER_INTERRUPTS) == 0)) {
-               if (ioa_cfg->sis64) {
-                       int_mask_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg);
-                       int_reg = readl(ioa_cfg->regs.sense_interrupt_reg) & ~int_mask_reg;
-                       if (int_reg & IPR_PCII_IPL_STAGE_CHANGE) {
-
-                               /* clear stage change */
-                               writel(IPR_PCII_IPL_STAGE_CHANGE, ioa_cfg->regs.clr_interrupt_reg);
-                               int_reg = readl(ioa_cfg->regs.sense_interrupt_reg) & ~int_mask_reg;
-                               list_del(&ioa_cfg->reset_cmd->queue);
-                               del_timer(&ioa_cfg->reset_cmd->timer);
-                               ipr_reset_ioa_job(ioa_cfg->reset_cmd);
-                               spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-                               return IRQ_HANDLED;
-                       }
-               }
-
-               spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-               return IRQ_NONE;
-       }
-
        while (1) {
                ipr_cmd = NULL;
 
@@ -4940,7 +4943,7 @@ static irqreturn_t ipr_isr(int irq, void *devp)
 
                        ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index];
 
-                       ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+                       ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
                        ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc);
 
@@ -4962,7 +4965,7 @@ static irqreturn_t ipr_isr(int irq, void *devp)
                        /* Clear the PCI interrupt */
                        do {
                                writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32);
-                               int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32) & ~int_mask_reg;
+                               int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32);
                        } while (int_reg & IPR_PCII_HRRQ_UPDATED &&
                                        num_hrrq++ < IPR_MAX_HRRQ_RETRIES);
 
@@ -4977,7 +4980,7 @@ static irqreturn_t ipr_isr(int irq, void *devp)
        }
 
        if (unlikely(rc == IRQ_NONE))
-               rc = ipr_handle_other_interrupt(ioa_cfg, int_reg);
+               rc = ipr_handle_other_interrupt(ioa_cfg);
 
        spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
        return rc;
@@ -5014,6 +5017,10 @@ static int ipr_build_ioadl64(struct ipr_ioa_cfg *ioa_cfg,
 
        ipr_cmd->dma_use_sg = nseg;
 
+       ioarcb->data_transfer_length = cpu_to_be32(length);
+       ioarcb->ioadl_len =
+               cpu_to_be32(sizeof(struct ipr_ioadl64_desc) * ipr_cmd->dma_use_sg);
+
        if (scsi_cmd->sc_data_direction == DMA_TO_DEVICE) {
                ioadl_flags = IPR_IOADL_FLAGS_WRITE;
                ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_WRITE_NOT_READ;
@@ -5135,7 +5142,7 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd)
        struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
        struct ipr_resource_entry *res = scsi_cmd->device->hostdata;
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
        if (IPR_IOASC_SENSE_KEY(ioasc) > 0) {
                scsi_cmd->result |= (DID_ERROR << 16);
@@ -5166,7 +5173,7 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd)
 static void ipr_reinit_ipr_cmnd_for_erp(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb;
-       struct ipr_ioasa *ioasa = &ipr_cmd->ioasa;
+       struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa;
        dma_addr_t dma_addr = ipr_cmd->dma_addr;
 
        memset(&ioarcb->cmd_pkt, 0, sizeof(struct ipr_cmd_pkt));
@@ -5174,8 +5181,8 @@ static void ipr_reinit_ipr_cmnd_for_erp(struct ipr_cmnd *ipr_cmd)
        ioarcb->read_data_transfer_length = 0;
        ioarcb->ioadl_len = 0;
        ioarcb->read_ioadl_len = 0;
-       ioasa->ioasc = 0;
-       ioasa->residual_data_len = 0;
+       ioasa->hdr.ioasc = 0;
+       ioasa->hdr.residual_data_len = 0;
 
        if (ipr_cmd->ioa_cfg->sis64)
                ioarcb->u.sis64_addr_data.data_ioadl_addr =
@@ -5200,7 +5207,7 @@ static void ipr_reinit_ipr_cmnd_for_erp(struct ipr_cmnd *ipr_cmd)
 static void ipr_erp_request_sense(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_cmd_pkt *cmd_pkt = &ipr_cmd->ioarcb.cmd_pkt;
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
        if (IPR_IOASC_SENSE_KEY(ioasc) > 0) {
                ipr_erp_done(ipr_cmd);
@@ -5277,12 +5284,12 @@ static void ipr_dump_ioasa(struct ipr_ioa_cfg *ioa_cfg,
        int i;
        u16 data_len;
        u32 ioasc, fd_ioasc;
-       struct ipr_ioasa *ioasa = &ipr_cmd->ioasa;
+       struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa;
        __be32 *ioasa_data = (__be32 *)ioasa;
        int error_index;
 
-       ioasc = be32_to_cpu(ioasa->ioasc) & IPR_IOASC_IOASC_MASK;
-       fd_ioasc = be32_to_cpu(ioasa->fd_ioasc) & IPR_IOASC_IOASC_MASK;
+       ioasc = be32_to_cpu(ioasa->hdr.ioasc) & IPR_IOASC_IOASC_MASK;
+       fd_ioasc = be32_to_cpu(ioasa->hdr.fd_ioasc) & IPR_IOASC_IOASC_MASK;
 
        if (0 == ioasc)
                return;
@@ -5297,7 +5304,7 @@ static void ipr_dump_ioasa(struct ipr_ioa_cfg *ioa_cfg,
 
        if (ioa_cfg->log_level < IPR_MAX_LOG_LEVEL) {
                /* Don't log an error if the IOA already logged one */
-               if (ioasa->ilid != 0)
+               if (ioasa->hdr.ilid != 0)
                        return;
 
                if (!ipr_is_gscsi(res))
@@ -5309,10 +5316,11 @@ static void ipr_dump_ioasa(struct ipr_ioa_cfg *ioa_cfg,
 
        ipr_res_err(ioa_cfg, res, "%s\n", ipr_error_table[error_index].error);
 
-       if (sizeof(struct ipr_ioasa) < be16_to_cpu(ioasa->ret_stat_len))
+       data_len = be16_to_cpu(ioasa->hdr.ret_stat_len);
+       if (ioa_cfg->sis64 && sizeof(struct ipr_ioasa64) < data_len)
+               data_len = sizeof(struct ipr_ioasa64);
+       else if (!ioa_cfg->sis64 && sizeof(struct ipr_ioasa) < data_len)
                data_len = sizeof(struct ipr_ioasa);
-       else
-               data_len = be16_to_cpu(ioasa->ret_stat_len);
 
        ipr_err("IOASA Dump:\n");
 
@@ -5338,8 +5346,8 @@ static void ipr_gen_sense(struct ipr_cmnd *ipr_cmd)
        u32 failing_lba;
        u8 *sense_buf = ipr_cmd->scsi_cmd->sense_buffer;
        struct ipr_resource_entry *res = ipr_cmd->scsi_cmd->device->hostdata;
-       struct ipr_ioasa *ioasa = &ipr_cmd->ioasa;
-       u32 ioasc = be32_to_cpu(ioasa->ioasc);
+       struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa;
+       u32 ioasc = be32_to_cpu(ioasa->hdr.ioasc);
 
        memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
 
@@ -5382,7 +5390,7 @@ static void ipr_gen_sense(struct ipr_cmnd *ipr_cmd)
 
                /* Illegal request */
                if ((IPR_IOASC_SENSE_KEY(ioasc) == 0x05) &&
-                   (be32_to_cpu(ioasa->ioasc_specific) & IPR_FIELD_POINTER_VALID)) {
+                   (be32_to_cpu(ioasa->hdr.ioasc_specific) & IPR_FIELD_POINTER_VALID)) {
                        sense_buf[7] = 10;      /* additional length */
 
                        /* IOARCB was in error */
@@ -5393,10 +5401,10 @@ static void ipr_gen_sense(struct ipr_cmnd *ipr_cmd)
 
                        sense_buf[16] =
                            ((IPR_FIELD_POINTER_MASK &
-                             be32_to_cpu(ioasa->ioasc_specific)) >> 8) & 0xff;
+                             be32_to_cpu(ioasa->hdr.ioasc_specific)) >> 8) & 0xff;
                        sense_buf[17] =
                            (IPR_FIELD_POINTER_MASK &
-                            be32_to_cpu(ioasa->ioasc_specific)) & 0xff;
+                            be32_to_cpu(ioasa->hdr.ioasc_specific)) & 0xff;
                } else {
                        if (ioasc == IPR_IOASC_MED_DO_NOT_REALLOC) {
                                if (ipr_is_vset_device(res))
@@ -5428,14 +5436,20 @@ static void ipr_gen_sense(struct ipr_cmnd *ipr_cmd)
  **/
 static int ipr_get_autosense(struct ipr_cmnd *ipr_cmd)
 {
-       struct ipr_ioasa *ioasa = &ipr_cmd->ioasa;
+       struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa;
+       struct ipr_ioasa64 *ioasa64 = &ipr_cmd->s.ioasa64;
 
-       if ((be32_to_cpu(ioasa->ioasc_specific) & IPR_AUTOSENSE_VALID) == 0)
+       if ((be32_to_cpu(ioasa->hdr.ioasc_specific) & IPR_AUTOSENSE_VALID) == 0)
                return 0;
 
-       memcpy(ipr_cmd->scsi_cmd->sense_buffer, ioasa->auto_sense.data,
-              min_t(u16, be16_to_cpu(ioasa->auto_sense.auto_sense_len),
-                  SCSI_SENSE_BUFFERSIZE));
+       if (ipr_cmd->ioa_cfg->sis64)
+               memcpy(ipr_cmd->scsi_cmd->sense_buffer, ioasa64->auto_sense.data,
+                      min_t(u16, be16_to_cpu(ioasa64->auto_sense.auto_sense_len),
+                          SCSI_SENSE_BUFFERSIZE));
+       else
+               memcpy(ipr_cmd->scsi_cmd->sense_buffer, ioasa->auto_sense.data,
+                      min_t(u16, be16_to_cpu(ioasa->auto_sense.auto_sense_len),
+                          SCSI_SENSE_BUFFERSIZE));
        return 1;
 }
 
@@ -5455,7 +5469,7 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg,
 {
        struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
        struct ipr_resource_entry *res = scsi_cmd->device->hostdata;
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
        u32 masked_ioasc = ioasc & IPR_IOASC_IOASC_MASK;
 
        if (!res) {
@@ -5547,9 +5561,9 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
-       scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->ioasa.residual_data_len));
+       scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len));
 
        if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) {
                scsi_dma_unmap(ipr_cmd->scsi_cmd);
@@ -5839,19 +5853,23 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd)
        struct ata_queued_cmd *qc = ipr_cmd->qc;
        struct ipr_sata_port *sata_port = qc->ap->private_data;
        struct ipr_resource_entry *res = sata_port->res;
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
-       memcpy(&sata_port->ioasa, &ipr_cmd->ioasa.u.gata,
-              sizeof(struct ipr_ioasa_gata));
+       if (ipr_cmd->ioa_cfg->sis64)
+               memcpy(&sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata,
+                      sizeof(struct ipr_ioasa_gata));
+       else
+               memcpy(&sata_port->ioasa, &ipr_cmd->s.ioasa.u.gata,
+                      sizeof(struct ipr_ioasa_gata));
        ipr_dump_ioasa(ioa_cfg, ipr_cmd, res);
 
-       if (be32_to_cpu(ipr_cmd->ioasa.ioasc_specific) & IPR_ATA_DEVICE_WAS_RESET)
+       if (be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc_specific) & IPR_ATA_DEVICE_WAS_RESET)
                scsi_report_device_reset(ioa_cfg->host, res->bus, res->target);
 
        if (IPR_IOASC_SENSE_KEY(ioasc) > RECOVERED_ERROR)
-               qc->err_mask |= __ac_err_mask(ipr_cmd->ioasa.u.gata.status);
+               qc->err_mask |= __ac_err_mask(sata_port->ioasa.status);
        else
-               qc->err_mask |= ac_err_mask(ipr_cmd->ioasa.u.gata.status);
+               qc->err_mask |= ac_err_mask(sata_port->ioasa.status);
        list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
        ata_qc_complete(qc);
 }
@@ -6520,7 +6538,7 @@ static void ipr_build_mode_sense(struct ipr_cmnd *ipr_cmd,
 static int ipr_reset_cmd_failed(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
        dev_err(&ioa_cfg->pdev->dev,
                "0x%02X failed with IOASC: 0x%08X\n",
@@ -6544,7 +6562,7 @@ static int ipr_reset_cmd_failed(struct ipr_cmnd *ipr_cmd)
 static int ipr_reset_mode_sense_failed(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
        if (ioasc == IPR_IOASC_IR_INVALID_REQ_TYPE_OR_PKT) {
                ipr_cmd->job_step = ipr_set_supported_devs;
@@ -6634,7 +6652,7 @@ static int ipr_ioafp_mode_select_page24(struct ipr_cmnd *ipr_cmd)
  **/
 static int ipr_reset_mode_sense_page24_failed(struct ipr_cmnd *ipr_cmd)
 {
-       u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+       u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
        if (ioasc == IPR_IOASC_IR_INVALID_REQ_TYPE_OR_PKT) {
                ipr_cmd->job_step = ipr_ioafp_mode_sense_page28;
@@ -6706,7 +6724,7 @@ static int ipr_init_res_table(struct ipr_cmnd *ipr_cmd)
                list_move_tail(&res->queue, &old_res);
 
        if (ioa_cfg->sis64)
-               entries = ioa_cfg->u.cfg_table64->hdr64.num_entries;
+               entries = be16_to_cpu(ioa_cfg->u.cfg_table64->hdr64.num_entries);
        else
                entries = ioa_cfg->u.cfg_table->hdr.num_entries;
 
@@ -6792,6 +6810,7 @@ static int ipr_ioafp_query_ioa_cfg(struct ipr_cmnd *ipr_cmd)
        ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE);
 
        ioarcb->cmd_pkt.cdb[0] = IPR_QUERY_IOA_CONFIG;
+       ioarcb->cmd_pkt.cdb[6] = (ioa_cfg->cfg_table_size >> 16) & 0xff;
        ioarcb->cmd_pkt.cdb[7] = (ioa_cfg->cfg_table_size >> 8) & 0xff;
        ioarcb->cmd_pkt.cdb[8] = ioa_cfg->cfg_table_size & 0xff;
 
@@ -7122,7 +7141,9 @@ static int ipr_reset_next_stage(struct ipr_cmnd *ipr_cmd)
        ipr_dbg("IPL stage = 0x%lx, IPL stage time = %ld\n", stage, stage_time);
 
        /* sanity check the stage_time value */
-       if (stage_time < IPR_IPL_INIT_MIN_STAGE_TIME)
+       if (stage_time == 0)
+               stage_time = IPR_IPL_INIT_DEFAULT_STAGE_TIME;
+       else if (stage_time < IPR_IPL_INIT_MIN_STAGE_TIME)
                stage_time = IPR_IPL_INIT_MIN_STAGE_TIME;
        else if (stage_time > IPR_LONG_OPERATIONAL_TIMEOUT)
                stage_time = IPR_LONG_OPERATIONAL_TIMEOUT;
@@ -7165,13 +7186,14 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        volatile u32 int_reg;
+       volatile u64 maskval;
 
        ENTER;
        ipr_cmd->job_step = ipr_ioafp_identify_hrrq;
        ipr_init_ioa_mem(ioa_cfg);
 
        ioa_cfg->allow_interrupts = 1;
-       int_reg = readl(ioa_cfg->regs.sense_interrupt_reg);
+       int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32);
 
        if (int_reg & IPR_PCII_IOA_TRANS_TO_OPER) {
                writel((IPR_PCII_ERROR_INTERRUPTS | IPR_PCII_HRRQ_UPDATED),
@@ -7183,9 +7205,12 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd)
        /* Enable destructive diagnostics on IOA */
        writel(ioa_cfg->doorbell, ioa_cfg->regs.set_uproc_interrupt_reg32);
 
-       writel(IPR_PCII_OPER_INTERRUPTS, ioa_cfg->regs.clr_interrupt_mask_reg32);
-       if (ioa_cfg->sis64)
-               writel(IPR_PCII_IPL_STAGE_CHANGE, ioa_cfg->regs.clr_interrupt_mask_reg);
+       if (ioa_cfg->sis64) {
+               maskval = IPR_PCII_IPL_STAGE_CHANGE;
+               maskval = (maskval << 32) | IPR_PCII_OPER_INTERRUPTS;
+               writeq(maskval, ioa_cfg->regs.clr_interrupt_mask_reg);
+       } else
+               writel(IPR_PCII_OPER_INTERRUPTS, ioa_cfg->regs.clr_interrupt_mask_reg32);
 
        int_reg = readl(ioa_cfg->regs.sense_interrupt_mask_reg);
 
@@ -7332,12 +7357,12 @@ static int ipr_reset_restore_cfg_space(struct ipr_cmnd *ipr_cmd)
        rc = pci_restore_state(ioa_cfg->pdev);
 
        if (rc != PCIBIOS_SUCCESSFUL) {
-               ipr_cmd->ioasa.ioasc = cpu_to_be32(IPR_IOASC_PCI_ACCESS_ERROR);
+               ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_PCI_ACCESS_ERROR);
                return IPR_RC_JOB_CONTINUE;
        }
 
        if (ipr_set_pcix_cmd_reg(ioa_cfg)) {
-               ipr_cmd->ioasa.ioasc = cpu_to_be32(IPR_IOASC_PCI_ACCESS_ERROR);
+               ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_PCI_ACCESS_ERROR);
                return IPR_RC_JOB_CONTINUE;
        }
 
@@ -7364,7 +7389,7 @@ static int ipr_reset_restore_cfg_space(struct ipr_cmnd *ipr_cmd)
                }
        }
 
-       ENTER;
+       LEAVE;
        return IPR_RC_JOB_CONTINUE;
 }
 
@@ -7406,7 +7431,7 @@ static int ipr_reset_start_bist(struct ipr_cmnd *ipr_cmd)
 
        if (rc != PCIBIOS_SUCCESSFUL) {
                pci_unblock_user_cfg_access(ipr_cmd->ioa_cfg->pdev);
-               ipr_cmd->ioasa.ioasc = cpu_to_be32(IPR_IOASC_PCI_ACCESS_ERROR);
+               ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_PCI_ACCESS_ERROR);
                rc = IPR_RC_JOB_CONTINUE;
        } else {
                ipr_cmd->job_step = ipr_reset_bist_done;
@@ -7665,7 +7690,7 @@ static void ipr_reset_ioa_job(struct ipr_cmnd *ipr_cmd)
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 
        do {
-               ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+               ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
                if (ioa_cfg->reset_cmd != ipr_cmd) {
                        /*
@@ -8048,13 +8073,13 @@ static int __devinit ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg)
                        ioarcb->u.sis64_addr_data.data_ioadl_addr =
                                cpu_to_be64(dma_addr + offsetof(struct ipr_cmnd, i.ioadl64));
                        ioarcb->u.sis64_addr_data.ioasa_host_pci_addr =
-                               cpu_to_be64(dma_addr + offsetof(struct ipr_cmnd, ioasa));
+                               cpu_to_be64(dma_addr + offsetof(struct ipr_cmnd, s.ioasa64));
                } else {
                        ioarcb->write_ioadl_addr =
                                cpu_to_be32(dma_addr + offsetof(struct ipr_cmnd, i.ioadl));
                        ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr;
                        ioarcb->ioasa_host_pci_addr =
-                               cpu_to_be32(dma_addr + offsetof(struct ipr_cmnd, ioasa));
+                               cpu_to_be32(dma_addr + offsetof(struct ipr_cmnd, s.ioasa));
                }
                ioarcb->ioasa_len = cpu_to_be16(sizeof(struct ipr_ioasa));
                ipr_cmd->cmd_index = i;
index 4c267b5..9ecd225 100644 (file)
 #define IPR_RUNTIME_RESET                              0x40000000
 
 #define IPR_IPL_INIT_MIN_STAGE_TIME                    5
+#define IPR_IPL_INIT_DEFAULT_STAGE_TIME                 15
 #define IPR_IPL_INIT_STAGE_UNKNOWN                     0x0
 #define IPR_IPL_INIT_STAGE_TRANSOP                     0xB0000000
 #define IPR_IPL_INIT_STAGE_MASK                                0xff000000
@@ -613,7 +614,7 @@ struct ipr_auto_sense {
        __be32 data[SCSI_SENSE_BUFFERSIZE/sizeof(__be32)];
 };
 
-struct ipr_ioasa {
+struct ipr_ioasa_hdr {
        __be32 ioasc;
 #define IPR_IOASC_SENSE_KEY(ioasc) ((ioasc) >> 24)
 #define IPR_IOASC_SENSE_CODE(ioasc) (((ioasc) & 0x00ff0000) >> 16)
@@ -645,6 +646,25 @@ struct ipr_ioasa {
 #define IPR_FIELD_POINTER_VALID                (0x80000000 >> 8)
 #define IPR_FIELD_POINTER_MASK         0x0000ffff
 
+}__attribute__((packed, aligned (4)));
+
+struct ipr_ioasa {
+       struct ipr_ioasa_hdr hdr;
+
+       union {
+               struct ipr_ioasa_vset vset;
+               struct ipr_ioasa_af_dasd dasd;
+               struct ipr_ioasa_gpdd gpdd;
+               struct ipr_ioasa_gata gata;
+       } u;
+
+       struct ipr_auto_sense auto_sense;
+}__attribute__((packed, aligned (4)));
+
+struct ipr_ioasa64 {
+       struct ipr_ioasa_hdr hdr;
+       u8 fd_res_path[8];
+
        union {
                struct ipr_ioasa_vset vset;
                struct ipr_ioasa_af_dasd dasd;
@@ -804,7 +824,7 @@ struct ipr_hostrcb_array_data_entry_enhanced {
 }__attribute__((packed, aligned (4)));
 
 struct ipr_hostrcb_type_ff_error {
-       __be32 ioa_data[502];
+       __be32 ioa_data[758];
 }__attribute__((packed, aligned (4)));
 
 struct ipr_hostrcb_type_01_error {
@@ -1181,7 +1201,7 @@ struct ipr_resource_entry {
        u8 flags;
        __be16 res_flags;
 
-       __be32 type;
+       u8 type;
 
        u8 qmodel;
        struct ipr_std_inq_data std_inq_data;
@@ -1464,7 +1484,10 @@ struct ipr_cmnd {
                struct ipr_ioadl64_desc ioadl64[IPR_NUM_IOADL_ENTRIES];
                struct ipr_ata64_ioadl ata_ioadl;
        } i;
-       struct ipr_ioasa ioasa;
+       union {
+               struct ipr_ioasa ioasa;
+               struct ipr_ioasa64 ioasa64;
+       } s;
        struct list_head queue;
        struct scsi_cmnd *scsi_cmd;
        struct ata_queued_cmd *qc;
index bf55d30..fec47de 100644 (file)
@@ -601,10 +601,8 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
        set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
        write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
 
-       if (sk_sleep(sock->sk)) {
-               sock->sk->sk_err = EIO;
-               wake_up_interruptible(sk_sleep(sock->sk));
-       }
+       sock->sk->sk_err = EIO;
+       wake_up_interruptible(sk_sleep(sock->sk));
 
        iscsi_conn_stop(cls_conn, flag);
        iscsi_sw_tcp_release_conn(conn);
index 716d178..c29d0db 100644 (file)
 #include <linux/stat.h>
 
 
-static struct Scsi_Host *mvme147_host = NULL;
-
-static irqreturn_t mvme147_intr(int irq, void *dummy)
+static irqreturn_t mvme147_intr(int irq, void *data)
 {
+       struct Scsi_Host *instance = data;
+
        if (irq == MVME147_IRQ_SCSI_PORT)
-               wd33c93_intr(mvme147_host);
+               wd33c93_intr(instance);
        else
                m147_pcc->dma_intr = 0x89;      /* Ack and enable ints */
        return IRQ_HANDLED;
@@ -29,7 +29,8 @@ static irqreturn_t mvme147_intr(int irq, void *dummy)
 
 static int dma_setup(struct scsi_cmnd *cmd, int dir_in)
 {
-       struct WD33C93_hostdata *hdata = shost_priv(mvme147_host);
+       struct Scsi_Host *instance = cmd->device->host;
+       struct WD33C93_hostdata *hdata = shost_priv(instance);
        unsigned char flags = 0x01;
        unsigned long addr = virt_to_bus(cmd->SCp.ptr);
 
@@ -66,6 +67,7 @@ static void dma_stop(struct Scsi_Host *instance, struct scsi_cmnd *SCpnt,
 int mvme147_detect(struct scsi_host_template *tpnt)
 {
        static unsigned char called = 0;
+       struct Scsi_Host *instance;
        wd33c93_regs regs;
        struct WD33C93_hostdata *hdata;
 
@@ -76,25 +78,25 @@ int mvme147_detect(struct scsi_host_template *tpnt)
        tpnt->proc_name = "MVME147";
        tpnt->proc_info = &wd33c93_proc_info;
 
-       mvme147_host = scsi_register(tpnt, sizeof(struct WD33C93_hostdata));
-       if (!mvme147_host)
+       instance = scsi_register(tpnt, sizeof(struct WD33C93_hostdata));
+       if (!instance)
                goto err_out;
 
-       mvme147_host->base = 0xfffe4000;
-       mvme147_host->irq = MVME147_IRQ_SCSI_PORT;
+       instance->base = 0xfffe4000;
+       instance->irq = MVME147_IRQ_SCSI_PORT;
        regs.SASR = (volatile unsigned char *)0xfffe4000;
        regs.SCMD = (volatile unsigned char *)0xfffe4001;
-       hdata = shost_priv(mvme147_host);
+       hdata = shost_priv(instance);
        hdata->no_sync = 0xff;
        hdata->fast = 0;
        hdata->dma_mode = CTRL_DMA;
-       wd33c93_init(mvme147_host, regs, dma_setup, dma_stop, WD33C93_FS_8_10);
+       wd33c93_init(instance, regs, dma_setup, dma_stop, WD33C93_FS_8_10);
 
        if (request_irq(MVME147_IRQ_SCSI_PORT, mvme147_intr, 0,
-                       "MVME147 SCSI PORT", mvme147_intr))
+                       "MVME147 SCSI PORT", instance))
                goto err_unregister;
        if (request_irq(MVME147_IRQ_SCSI_DMA, mvme147_intr, 0,
-                       "MVME147 SCSI DMA", mvme147_intr))
+                       "MVME147 SCSI DMA", instance))
                goto err_free_irq;
 #if 0  /* Disabled; causes problems booting */
        m147_pcc->scsi_interrupt = 0x10;        /* Assert SCSI bus reset */
@@ -113,7 +115,7 @@ int mvme147_detect(struct scsi_host_template *tpnt)
 err_free_irq:
        free_irq(MVME147_IRQ_SCSI_PORT, mvme147_intr);
 err_unregister:
-       scsi_unregister(mvme147_host);
+       scsi_unregister(instance);
 err_out:
        return 0;
 }
@@ -132,9 +134,6 @@ static int mvme147_bus_reset(struct scsi_cmnd *cmd)
        return SUCCESS;
 }
 
-#define HOSTS_C
-
-#include "mvme147.h"
 
 static struct scsi_host_template driver_template = {
        .proc_name              = "MVME147",
index 8dbf1c3..d64b717 100644 (file)
@@ -3587,7 +3587,7 @@ if (SRpnt) printk(KERN_ERR "%s:A: Not supposed to have SRpnt at line %d\n", name
                if (i == (-ENOSPC)) {
                        transfer = STp->buffer->writing;        /* FIXME -- check this logic */
                        if (transfer <= do_count) {
-                               filp->f_pos += do_count - transfer;
+                               *ppos += do_count - transfer;
                                count -= do_count - transfer;
                                if (STps->drv_block >= 0) {
                                        STps->drv_block += (do_count - transfer) / STp->block_size;
@@ -3625,7 +3625,7 @@ if (SRpnt) printk(KERN_ERR "%s:A: Not supposed to have SRpnt at line %d\n", name
                        goto out;
                }
 
-               filp->f_pos += do_count;
+               *ppos += do_count;
                b_point += do_count;
                count -= do_count;
                if (STps->drv_block >= 0) {
@@ -3647,7 +3647,7 @@ if (SRpnt) printk(KERN_ERR "%s:A: Not supposed to have SRpnt at line %d\n", name
                if (STps->drv_block >= 0) {
                        STps->drv_block += blks;
                }
-               filp->f_pos += count;
+               *ppos += count;
                count = 0;
        }
 
@@ -3823,7 +3823,7 @@ static ssize_t osst_read(struct file * filp, char __user * buf, size_t count, lo
                        }
                        STp->logical_blk_num += transfer / STp->block_size;
                        STps->drv_block      += transfer / STp->block_size;
-                       filp->f_pos          += transfer;
+                       *ppos          += transfer;
                        buf                  += transfer;
                        total                += transfer;
                }
@@ -5626,6 +5626,7 @@ static const struct file_operations osst_fops = {
        .open =         os_scsi_tape_open,
        .flush =        os_scsi_tape_flush,
        .release =      os_scsi_tape_close,
+       .llseek =       noop_llseek,
 };
 
 static int osst_supports(struct scsi_device * SDp)
index 9798c2c..1c027a9 100644 (file)
@@ -492,19 +492,20 @@ void scsi_target_reap(struct scsi_target *starget)
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
        unsigned long flags;
        enum scsi_target_state state;
-       int empty;
+       int empty = 0;
 
        spin_lock_irqsave(shost->host_lock, flags);
        state = starget->state;
-       empty = --starget->reap_ref == 0 &&
-               list_empty(&starget->devices) ? 1 : 0;
+       if (--starget->reap_ref == 0 && list_empty(&starget->devices)) {
+               empty = 1;
+               starget->state = STARGET_DEL;
+       }
        spin_unlock_irqrestore(shost->host_lock, flags);
 
        if (!empty)
                return;
 
        BUG_ON(state == STARGET_DEL);
-       starget->state = STARGET_DEL;
        if (state == STARGET_CREATED)
                scsi_target_destroy(starget);
        else
index 3ea1a71..24211d0 100644 (file)
@@ -3962,6 +3962,7 @@ static const struct file_operations st_fops =
        .open =         st_open,
        .flush =        st_flush,
        .release =      st_release,
+       .llseek =       noop_llseek,
 };
 
 static int st_probe(struct device *dev)
index 34aba30..f5b4ca5 100644 (file)
@@ -173,3 +173,44 @@ int sfi_acpi_table_parse(char *signature, char *oem_id, char *oem_table_id,
        sfi_acpi_put_table(table);
        return ret;
 }
+
+static ssize_t sfi_acpi_table_show(struct file *filp, struct kobject *kobj,
+                              struct bin_attribute *bin_attr, char *buf,
+                              loff_t offset, size_t count)
+{
+       struct sfi_table_attr *tbl_attr =
+           container_of(bin_attr, struct sfi_table_attr, attr);
+       struct acpi_table_header *th = NULL;
+       struct sfi_table_key key;
+       ssize_t cnt;
+
+       key.sig = tbl_attr->name;
+       key.oem_id = NULL;
+       key.oem_table_id = NULL;
+
+       th = sfi_acpi_get_table(&key);
+       if (!th)
+               return 0;
+
+       cnt =  memory_read_from_buffer(buf, count, &offset,
+                                       th, th->length);
+       sfi_acpi_put_table(th);
+
+       return cnt;
+}
+
+
+void __init sfi_acpi_sysfs_init(void)
+{
+       u32 tbl_cnt, i;
+       struct sfi_table_attr *tbl_attr;
+
+       tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
+       for (i = 0; i < tbl_cnt; i++) {
+               tbl_attr =
+                       sfi_sysfs_install_table(xsdt_va->table_offset_entry[i]);
+               tbl_attr->attr.read = sfi_acpi_table_show;
+       }
+
+       return;
+}
index b204a09..0051959 100644 (file)
@@ -67,6 +67,7 @@
 #include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/sfi.h>
+#include <linux/slab.h>
 
 #include "sfi_core.h"
 
@@ -382,6 +383,102 @@ static __init int sfi_find_syst(void)
        return -1;
 }
 
+static struct kobject *sfi_kobj;
+static struct kobject *tables_kobj;
+
+static ssize_t sfi_table_show(struct file *filp, struct kobject *kobj,
+                              struct bin_attribute *bin_attr, char *buf,
+                              loff_t offset, size_t count)
+{
+       struct sfi_table_attr *tbl_attr =
+           container_of(bin_attr, struct sfi_table_attr, attr);
+       struct sfi_table_header *th = NULL;
+       struct sfi_table_key key;
+       ssize_t cnt;
+
+       key.sig = tbl_attr->name;
+       key.oem_id = NULL;
+       key.oem_table_id = NULL;
+
+       if (strncmp(SFI_SIG_SYST, tbl_attr->name, SFI_SIGNATURE_SIZE)) {
+               th = sfi_get_table(&key);
+               if (!th)
+                       return 0;
+
+               cnt =  memory_read_from_buffer(buf, count, &offset,
+                                               th, th->len);
+               sfi_put_table(th);
+       } else
+               cnt =  memory_read_from_buffer(buf, count, &offset,
+                                       syst_va, syst_va->header.len);
+
+       return cnt;
+}
+
+struct sfi_table_attr __init *sfi_sysfs_install_table(u64 pa)
+{
+       struct sfi_table_attr *tbl_attr;
+       struct sfi_table_header *th;
+       int ret;
+
+       tbl_attr = kzalloc(sizeof(struct sfi_table_attr), GFP_KERNEL);
+       if (!tbl_attr)
+               return NULL;
+
+       th = sfi_map_table(pa);
+       if (!th || !th->sig[0]) {
+               kfree(tbl_attr);
+               return NULL;
+       }
+
+       sysfs_attr_init(&tbl_attr->attr.attr);
+       memcpy(tbl_attr->name, th->sig, SFI_SIGNATURE_SIZE);
+
+       tbl_attr->attr.size = 0;
+       tbl_attr->attr.read = sfi_table_show;
+       tbl_attr->attr.attr.name = tbl_attr->name;
+       tbl_attr->attr.attr.mode = 0400;
+
+       ret = sysfs_create_bin_file(tables_kobj,
+                                 &tbl_attr->attr);
+       if (ret)
+               kfree(tbl_attr);
+
+       sfi_unmap_table(th);
+       return tbl_attr;
+}
+
+static int __init sfi_sysfs_init(void)
+{
+       int tbl_cnt, i;
+
+       if (sfi_disabled)
+               return 0;
+
+       sfi_kobj = kobject_create_and_add("sfi", firmware_kobj);
+       if (!sfi_kobj)
+               return 0;
+
+       tables_kobj = kobject_create_and_add("tables", sfi_kobj);
+       if (!tables_kobj) {
+               kobject_put(sfi_kobj);
+               return 0;
+       }
+
+       sfi_sysfs_install_table(syst_pa);
+
+       tbl_cnt = SFI_GET_NUM_ENTRIES(syst_va, u64);
+
+       for (i = 0; i < tbl_cnt; i++)
+               sfi_sysfs_install_table(syst_va->pentry[i]);
+
+       sfi_acpi_sysfs_init();
+       kobject_uevent(sfi_kobj, KOBJ_ADD);
+       kobject_uevent(tables_kobj, KOBJ_ADD);
+       pr_info("SFI sysfs interfaces init success\n");
+       return 0;
+}
+
 void __init sfi_init(void)
 {
        if (!acpi_disabled)
@@ -390,7 +487,7 @@ void __init sfi_init(void)
        if (sfi_disabled)
                return;
 
-       pr_info("Simple Firmware Interface v0.7 http://simplefirmware.org\n");
+       pr_info("Simple Firmware Interface v0.81 http://simplefirmware.org\n");
 
        if (sfi_find_syst() || sfi_parse_syst() || sfi_platform_init())
                disable_sfi();
@@ -414,3 +511,9 @@ void __init sfi_init_late(void)
 
        sfi_acpi_init();
 }
+
+/*
+ * The reason we put it here becasue we need wait till the /sys/firmware
+ * is setup, then our interface can be registered in /sys/firmware/sfi
+ */
+core_initcall(sfi_sysfs_init);
index da82d39..b7cf220 100644 (file)
@@ -61,6 +61,12 @@ struct sfi_table_key{
        char    *oem_table_id;
 };
 
+/* sysfs interface */
+struct sfi_table_attr {
+       struct bin_attribute attr;
+       char name[8];
+};
+
 #define SFI_ANY_KEY { .sig = NULL, .oem_id = NULL, .oem_table_id = NULL }
 
 extern int __init sfi_acpi_init(void);
@@ -68,3 +74,5 @@ extern  struct sfi_table_header *sfi_check_table(u64 paddr,
                                        struct sfi_table_key *key);
 struct sfi_table_header *sfi_get_table(struct sfi_table_key *key);
 extern void sfi_put_table(struct sfi_table_header *table);
+extern struct sfi_table_attr __init *sfi_sysfs_install_table(u64 pa);
+extern void __init sfi_acpi_sysfs_init(void);
index 49f0d31..cf7c34a 100644 (file)
@@ -242,13 +242,13 @@ static void saa7134_go7007_irq_ts_done(struct saa7134_dev *dev,
                printk(KERN_DEBUG "saa7134-go7007: irq: lost %ld\n",
                                (status >> 16) & 0x0f);
        if (status & 0x100000) {
-               dma_sync_single(&dev->pci->dev,
-                               saa->bottom_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+               dma_sync_single_for_cpu(&dev->pci->dev,
+                                       saa->bottom_dma, PAGE_SIZE, DMA_FROM_DEVICE);
                go7007_parse_video_stream(go, saa->bottom, PAGE_SIZE);
                saa_writel(SAA7134_RS_BA2(5), cpu_to_le32(saa->bottom_dma));
        } else {
-               dma_sync_single(&dev->pci->dev,
-                               saa->top_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+               dma_sync_single_for_cpu(&dev->pci->dev,
+                                       saa->top_dma, PAGE_SIZE, DMA_FROM_DEVICE);
                go7007_parse_video_stream(go, saa->top, PAGE_SIZE);
                saa_writel(SAA7134_RS_BA1(5), cpu_to_le32(saa->top_dma));
        }
index e89304c..b53deee 100644 (file)
@@ -5879,20 +5879,13 @@ out:
 static int ixj_build_filter_cadence(IXJ *j, IXJ_FILTER_CADENCE __user * cp)
 {
        IXJ_FILTER_CADENCE *lcp;
-       lcp = kmalloc(sizeof(IXJ_FILTER_CADENCE), GFP_KERNEL);
-       if (lcp == NULL) {
+       lcp = memdup_user(cp, sizeof(IXJ_FILTER_CADENCE));
+       if (IS_ERR(lcp)) {
                if(ixjdebug & 0x0001) {
-                       printk(KERN_INFO "Could not allocate memory for cadence\n");
+                       printk(KERN_INFO "Could not allocate memory for cadence or could not copy cadence to kernel\n");
                }
-               return -ENOMEM;
+               return PTR_ERR(lcp);
         }
-       if (copy_from_user(lcp, cp, sizeof(IXJ_FILTER_CADENCE))) {
-               if(ixjdebug & 0x0001) {
-                       printk(KERN_INFO "Could not copy cadence to kernel\n");
-               }
-               kfree(lcp);
-               return -EFAULT;
-       }
        if (lcp->filter > 5) {
                if(ixjdebug & 0x0001) {
                        printk(KERN_INFO "Cadence out of range\n");
index 23b2a8c..b020ba7 100644 (file)
@@ -501,7 +501,9 @@ static irqreturn_t bfin_bf54x_irq_error(int irq, void *dev_id)
 
 static int __devinit bfin_bf54x_probe(struct platform_device *pdev)
 {
+#ifndef NO_BL_SUPPORT
        struct backlight_properties props;
+#endif
        struct bfin_bf54xfb_info *info;
        struct fb_info *fbinfo;
        int ret;
@@ -654,7 +656,8 @@ static int __devinit bfin_bf54x_probe(struct platform_device *pdev)
                printk(KERN_ERR DRIVER_NAME
                        ": unable to register backlight.\n");
                ret = -EINVAL;
-               goto out9;
+               unregister_framebuffer(fbinfo);
+               goto out8;
        }
 
        lcd_dev = lcd_device_register(DRIVER_NAME, &pdev->dev, NULL, &bfin_lcd_ops);
@@ -663,8 +666,6 @@ static int __devinit bfin_bf54x_probe(struct platform_device *pdev)
 
        return 0;
 
-out9:
-       unregister_framebuffer(fbinfo);
 out8:
        free_irq(info->irq, info);
 out7:
index c2ec3dc..7a50272 100644 (file)
@@ -420,7 +420,9 @@ static irqreturn_t bfin_t350mcqb_irq_error(int irq, void *dev_id)
 
 static int __devinit bfin_t350mcqb_probe(struct platform_device *pdev)
 {
+#ifndef NO_BL_SUPPORT
        struct backlight_properties props;
+#endif
        struct bfin_t350mcqbfb_info *info;
        struct fb_info *fbinfo;
        int ret;
@@ -550,7 +552,8 @@ static int __devinit bfin_t350mcqb_probe(struct platform_device *pdev)
                printk(KERN_ERR DRIVER_NAME
                        ": unable to register backlight.\n");
                ret = -EINVAL;
-               goto out9;
+               unregister_framebuffer(fbinfo);
+               goto out8;
        }
 
        lcd_dev = lcd_device_register(DRIVER_NAME, NULL, &bfin_lcd_ops);
@@ -559,8 +562,6 @@ static int __devinit bfin_t350mcqb_probe(struct platform_device *pdev)
 
        return 0;
 
-out9:
-       unregister_framebuffer(fbinfo);
 out8:
        free_irq(info->irq, info);
 out7:
index d4471b4..dce8c97 100644 (file)
@@ -71,7 +71,8 @@ static const char * const s3_names[] = {"S3 Unknown", "S3 Trio32", "S3 Trio64",
                        "S3 Trio64UV+", "S3 Trio64V2/DX", "S3 Trio64V2/GX",
                        "S3 Plato/PX", "S3 Aurora64VP", "S3 Virge",
                        "S3 Virge/VX", "S3 Virge/DX", "S3 Virge/GX",
-                       "S3 Virge/GX2", "S3 Virge/GX2P", "S3 Virge/GX2P"};
+                       "S3 Virge/GX2", "S3 Virge/GX2P", "S3 Virge/GX2P",
+                       "S3 Trio3D/1X", "S3 Trio3D/2X", "S3 Trio3D/2X"};
 
 #define CHIP_UNKNOWN           0x00
 #define CHIP_732_TRIO32                0x01
@@ -89,10 +90,14 @@ static const char * const s3_names[] = {"S3 Unknown", "S3 Trio32", "S3 Trio64",
 #define CHIP_356_VIRGE_GX2     0x0D
 #define CHIP_357_VIRGE_GX2P    0x0E
 #define CHIP_359_VIRGE_GX2P    0x0F
+#define CHIP_360_TRIO3D_1X     0x10
+#define CHIP_362_TRIO3D_2X     0x11
+#define CHIP_368_TRIO3D_2X     0x12
 
 #define CHIP_XXX_TRIO          0x80
 #define CHIP_XXX_TRIO64V2_DXGX 0x81
 #define CHIP_XXX_VIRGE_DXGX    0x82
+#define CHIP_36X_TRIO3D_1X_2X  0x83
 
 #define CHIP_UNDECIDED_FLAG    0x80
 #define CHIP_MASK              0xFF
@@ -324,6 +329,7 @@ static void s3fb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 
 static void s3_set_pixclock(struct fb_info *info, u32 pixclock)
 {
+       struct s3fb_info *par = info->par;
        u16 m, n, r;
        u8 regval;
        int rv;
@@ -339,7 +345,13 @@ static void s3_set_pixclock(struct fb_info *info, u32 pixclock)
        vga_w(NULL, VGA_MIS_W, regval | VGA_MIS_ENB_PLL_LOAD);
 
        /* Set S3 clock registers */
-       vga_wseq(NULL, 0x12, ((n - 2) | (r << 5)));
+       if (par->chip == CHIP_360_TRIO3D_1X ||
+           par->chip == CHIP_362_TRIO3D_2X ||
+           par->chip == CHIP_368_TRIO3D_2X) {
+               vga_wseq(NULL, 0x12, (n - 2) | ((r & 3) << 6)); /* n and two bits of r */
+               vga_wseq(NULL, 0x29, r >> 2); /* remaining highest bit of r */
+       } else
+               vga_wseq(NULL, 0x12, (n - 2) | (r << 5));
        vga_wseq(NULL, 0x13, m - 2);
 
        udelay(1000);
@@ -456,7 +468,7 @@ static int s3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 static int s3fb_set_par(struct fb_info *info)
 {
        struct s3fb_info *par = info->par;
-       u32 value, mode, hmul, offset_value, screen_size, multiplex;
+       u32 value, mode, hmul, offset_value, screen_size, multiplex, dbytes;
        u32 bpp = info->var.bits_per_pixel;
 
        if (bpp != 0) {
@@ -518,7 +530,7 @@ static int s3fb_set_par(struct fb_info *info)
        svga_wcrt_mask(0x33, 0x00, 0x08); /* no DDR ?   */
        svga_wcrt_mask(0x43, 0x00, 0x01); /* no DDR ?   */
 
-       svga_wcrt_mask(0x5D, 0x00, 0x28); // Clear strange HSlen bits
+       svga_wcrt_mask(0x5D, 0x00, 0x28); /* Clear strange HSlen bits */
 
 /*     svga_wcrt_mask(0x58, 0x03, 0x03); */
 
@@ -530,10 +542,14 @@ static int s3fb_set_par(struct fb_info *info)
        pr_debug("fb%d: offset register       : %d\n", info->node, offset_value);
        svga_wcrt_multi(s3_offset_regs, offset_value);
 
-       vga_wcrt(NULL, 0x54, 0x18); /* M parameter */
-       vga_wcrt(NULL, 0x60, 0xff); /* N parameter */
-       vga_wcrt(NULL, 0x61, 0xff); /* L parameter */
-       vga_wcrt(NULL, 0x62, 0xff); /* L parameter */
+       if (par->chip != CHIP_360_TRIO3D_1X &&
+           par->chip != CHIP_362_TRIO3D_2X &&
+           par->chip != CHIP_368_TRIO3D_2X) {
+               vga_wcrt(NULL, 0x54, 0x18); /* M parameter */
+               vga_wcrt(NULL, 0x60, 0xff); /* N parameter */
+               vga_wcrt(NULL, 0x61, 0xff); /* L parameter */
+               vga_wcrt(NULL, 0x62, 0xff); /* L parameter */
+       }
 
        vga_wcrt(NULL, 0x3A, 0x35);
        svga_wattr(0x33, 0x00);
@@ -570,6 +586,16 @@ static int s3fb_set_par(struct fb_info *info)
                vga_wcrt(NULL, 0x66, 0x90);
        }
 
+       if (par->chip == CHIP_360_TRIO3D_1X ||
+           par->chip == CHIP_362_TRIO3D_2X ||
+           par->chip == CHIP_368_TRIO3D_2X) {
+               dbytes = info->var.xres * ((bpp+7)/8);
+               vga_wcrt(NULL, 0x91, (dbytes + 7) / 8);
+               vga_wcrt(NULL, 0x90, (((dbytes + 7) / 8) >> 8) | 0x80);
+
+               vga_wcrt(NULL, 0x66, 0x81);
+       }
+
        svga_wcrt_mask(0x31, 0x00, 0x40);
        multiplex = 0;
        hmul = 1;
@@ -615,11 +641,13 @@ static int s3fb_set_par(struct fb_info *info)
                break;
        case 3:
                pr_debug("fb%d: 8 bit pseudocolor\n", info->node);
-               if (info->var.pixclock > 20000) {
-                       svga_wcrt_mask(0x50, 0x00, 0x30);
+               svga_wcrt_mask(0x50, 0x00, 0x30);
+               if (info->var.pixclock > 20000 ||
+                   par->chip == CHIP_360_TRIO3D_1X ||
+                   par->chip == CHIP_362_TRIO3D_2X ||
+                   par->chip == CHIP_368_TRIO3D_2X)
                        svga_wcrt_mask(0x67, 0x00, 0xF0);
-               } else {
-                       svga_wcrt_mask(0x50, 0x00, 0x30);
+               else {
                        svga_wcrt_mask(0x67, 0x10, 0xF0);
                        multiplex = 1;
                }
@@ -634,7 +662,10 @@ static int s3fb_set_par(struct fb_info *info)
                } else {
                        svga_wcrt_mask(0x50, 0x10, 0x30);
                        svga_wcrt_mask(0x67, 0x30, 0xF0);
-                       hmul = 2;
+                       if (par->chip != CHIP_360_TRIO3D_1X &&
+                           par->chip != CHIP_362_TRIO3D_2X &&
+                           par->chip != CHIP_368_TRIO3D_2X)
+                               hmul = 2;
                }
                break;
        case 5:
@@ -647,7 +678,10 @@ static int s3fb_set_par(struct fb_info *info)
                } else {
                        svga_wcrt_mask(0x50, 0x10, 0x30);
                        svga_wcrt_mask(0x67, 0x50, 0xF0);
-                       hmul = 2;
+                       if (par->chip != CHIP_360_TRIO3D_1X &&
+                           par->chip != CHIP_362_TRIO3D_2X &&
+                           par->chip != CHIP_368_TRIO3D_2X)
+                               hmul = 2;
                }
                break;
        case 6:
@@ -866,6 +900,17 @@ static int __devinit s3_identification(int chip)
                        return CHIP_385_VIRGE_GX;
        }
 
+       if (chip == CHIP_36X_TRIO3D_1X_2X) {
+               switch (vga_rcrt(NULL, 0x2f)) {
+               case 0x00:
+                       return CHIP_360_TRIO3D_1X;
+               case 0x01:
+                       return CHIP_362_TRIO3D_2X;
+               case 0x02:
+                       return CHIP_368_TRIO3D_2X;
+               }
+       }
+
        return CHIP_UNKNOWN;
 }
 
@@ -930,17 +975,32 @@ static int __devinit s3_pci_probe(struct pci_dev *dev, const struct pci_device_i
        vga_wcrt(NULL, 0x38, 0x48);
        vga_wcrt(NULL, 0x39, 0xA5);
 
-       /* Find how many physical memory there is on card */
-       /* 0x36 register is accessible even if other registers are locked */
-       regval = vga_rcrt(NULL, 0x36);
-       info->screen_size = s3_memsizes[regval >> 5] << 10;
-       info->fix.smem_len = info->screen_size;
-
+       /* Identify chip type */
        par->chip = id->driver_data & CHIP_MASK;
        par->rev = vga_rcrt(NULL, 0x2f);
        if (par->chip & CHIP_UNDECIDED_FLAG)
                par->chip = s3_identification(par->chip);
 
+       /* Find how many physical memory there is on card */
+       /* 0x36 register is accessible even if other registers are locked */
+       regval = vga_rcrt(NULL, 0x36);
+       if (par->chip == CHIP_360_TRIO3D_1X ||
+           par->chip == CHIP_362_TRIO3D_2X ||
+           par->chip == CHIP_368_TRIO3D_2X) {
+               switch ((regval & 0xE0) >> 5) {
+               case 0: /* 8MB -- only 4MB usable for display */
+               case 1: /* 4MB with 32-bit bus */
+               case 2: /* 4MB */
+                       info->screen_size = 4 << 20;
+                       break;
+               case 6: /* 2MB */
+                       info->screen_size = 2 << 20;
+                       break;
+               }
+       } else
+               info->screen_size = s3_memsizes[regval >> 5] << 10;
+       info->fix.smem_len = info->screen_size;
+
        /* Find MCLK frequency */
        regval = vga_rseq(NULL, 0x10);
        par->mclk_freq = ((vga_rseq(NULL, 0x11) + 2) * 14318) / ((regval & 0x1F)  + 2);
@@ -1131,6 +1191,7 @@ static struct pci_device_id s3_devices[] __devinitdata = {
        {PCI_DEVICE(PCI_VENDOR_ID_S3, 0x8A10), .driver_data = CHIP_356_VIRGE_GX2},
        {PCI_DEVICE(PCI_VENDOR_ID_S3, 0x8A11), .driver_data = CHIP_357_VIRGE_GX2P},
        {PCI_DEVICE(PCI_VENDOR_ID_S3, 0x8A12), .driver_data = CHIP_359_VIRGE_GX2P},
+       {PCI_DEVICE(PCI_VENDOR_ID_S3, 0x8A13), .driver_data = CHIP_36X_TRIO3D_1X_2X},
 
        {0, 0, 0, 0, 0, 0, 0}
 };
index 2bc40e6..1082541 100644 (file)
@@ -578,14 +578,9 @@ static int viafb_ioctl(struct fb_info *info, u_int cmd, u_long arg)
                break;
 
        case VIAFB_SET_GAMMA_LUT:
-               viafb_gamma_table = kmalloc(256 * sizeof(u32), GFP_KERNEL);
-               if (!viafb_gamma_table)
-                       return -ENOMEM;
-               if (copy_from_user(viafb_gamma_table, argp,
-                               256 * sizeof(u32))) {
-                       kfree(viafb_gamma_table);
-                       return -EFAULT;
-               }
+               viafb_gamma_table = memdup_user(argp, 256 * sizeof(u32));
+               if (IS_ERR(viafb_gamma_table))
+                       return PTR_ERR(viafb_gamma_table);
                viafb_set_gamma_table(viafb_bpp, viafb_gamma_table);
                kfree(viafb_gamma_table);
                break;
index d70bbba..914d1c0 100644 (file)
@@ -224,7 +224,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
                affs_brelse(bh);
                inode = affs_iget(sb, ino);
                if (IS_ERR(inode))
-                       return ERR_PTR(PTR_ERR(inode));
+                       return ERR_CAST(inode);
        }
        dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations;
        d_add(dentry, inode);
index 1cf12b3..48fdeeb 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -36,6 +36,7 @@
 #include <linux/blkdev.h>
 #include <linux/mempool.h>
 #include <linux/hash.h>
+#include <linux/compat.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -1384,13 +1385,22 @@ static ssize_t aio_fsync(struct kiocb *iocb)
        return ret;
 }
 
-static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb)
+static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
 {
        ssize_t ret;
 
-       ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf,
-                                   kiocb->ki_nbytes, 1,
-                                   &kiocb->ki_inline_vec, &kiocb->ki_iovec);
+#ifdef CONFIG_COMPAT
+       if (compat)
+               ret = compat_rw_copy_check_uvector(type,
+                               (struct compat_iovec __user *)kiocb->ki_buf,
+                               kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
+                               &kiocb->ki_iovec);
+       else
+#endif
+               ret = rw_copy_check_uvector(type,
+                               (struct iovec __user *)kiocb->ki_buf,
+                               kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
+                               &kiocb->ki_iovec);
        if (ret < 0)
                goto out;
 
@@ -1420,7 +1430,7 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
  *     Performs the initial checks and aio retry method
  *     setup for the kiocb at the time of io submission.
  */
-static ssize_t aio_setup_iocb(struct kiocb *kiocb)
+static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
 {
        struct file *file = kiocb->ki_filp;
        ssize_t ret = 0;
@@ -1469,7 +1479,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
                ret = security_file_permission(file, MAY_READ);
                if (unlikely(ret))
                        break;
-               ret = aio_setup_vectored_rw(READ, kiocb);
+               ret = aio_setup_vectored_rw(READ, kiocb, compat);
                if (ret)
                        break;
                ret = -EINVAL;
@@ -1483,7 +1493,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
                ret = security_file_permission(file, MAY_WRITE);
                if (unlikely(ret))
                        break;
-               ret = aio_setup_vectored_rw(WRITE, kiocb);
+               ret = aio_setup_vectored_rw(WRITE, kiocb, compat);
                if (ret)
                        break;
                ret = -EINVAL;
@@ -1548,7 +1558,8 @@ static void aio_batch_free(struct hlist_head *batch_hash)
 }
 
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-                        struct iocb *iocb, struct hlist_head *batch_hash)
+                        struct iocb *iocb, struct hlist_head *batch_hash,
+                        bool compat)
 {
        struct kiocb *req;
        struct file *file;
@@ -1609,7 +1620,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
        req->ki_opcode = iocb->aio_lio_opcode;
 
-       ret = aio_setup_iocb(req);
+       ret = aio_setup_iocb(req, compat);
 
        if (ret)
                goto out_put_req;
@@ -1637,20 +1648,8 @@ out_put_req:
        return ret;
 }
 
-/* sys_io_submit:
- *     Queue the nr iocbs pointed to by iocbpp for processing.  Returns
- *     the number of iocbs queued.  May return -EINVAL if the aio_context
- *     specified by ctx_id is invalid, if nr is < 0, if the iocb at
- *     *iocbpp[0] is not properly initialized, if the operation specified
- *     is invalid for the file descriptor in the iocb.  May fail with
- *     -EFAULT if any of the data structures point to invalid data.  May
- *     fail with -EBADF if the file descriptor specified in the first
- *     iocb is invalid.  May fail with -EAGAIN if insufficient resources
- *     are available to queue any iocbs.  Will return 0 if nr is 0.  Will
- *     fail with -ENOSYS if not implemented.
- */
-SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
-               struct iocb __user * __user *, iocbpp)
+long do_io_submit(aio_context_t ctx_id, long nr,
+                 struct iocb __user *__user *iocbpp, bool compat)
 {
        struct kioctx *ctx;
        long ret = 0;
@@ -1687,7 +1686,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
                        break;
                }
 
-               ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash);
+               ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash, compat);
                if (ret)
                        break;
        }
@@ -1697,6 +1696,24 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
        return i ? i : ret;
 }
 
+/* sys_io_submit:
+ *     Queue the nr iocbs pointed to by iocbpp for processing.  Returns
+ *     the number of iocbs queued.  May return -EINVAL if the aio_context
+ *     specified by ctx_id is invalid, if nr is < 0, if the iocb at
+ *     *iocbpp[0] is not properly initialized, if the operation specified
+ *     is invalid for the file descriptor in the iocb.  May fail with
+ *     -EFAULT if any of the data structures point to invalid data.  May
+ *     fail with -EBADF if the file descriptor specified in the first
+ *     iocb is invalid.  May fail with -EAGAIN if insufficient resources
+ *     are available to queue any iocbs.  Will return 0 if nr is 0.  Will
+ *     fail with -ENOSYS if not implemented.
+ */
+SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
+               struct iocb __user * __user *, iocbpp)
+{
+       return do_io_submit(ctx_id, nr, iocbpp, 0);
+}
+
 /* lookup_kiocb
  *     Finds a given iocb for cancellation.
  */
index 8713c7c..9a0520b 100644 (file)
@@ -28,6 +28,7 @@ static int autofs_root_mkdir(struct inode *,struct dentry *,int);
 static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
 
 const struct file_operations autofs_root_operations = {
+       .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .readdir        = autofs_root_readdir,
        .ioctl          = autofs_root_ioctl,
index d832062..ba4a38b 100644 (file)
@@ -95,7 +95,7 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
  */
 static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
 {
-       struct autofs_dev_ioctl tmp, *ads;
+       struct autofs_dev_ioctl tmp;
 
        if (copy_from_user(&tmp, in, sizeof(tmp)))
                return ERR_PTR(-EFAULT);
@@ -103,16 +103,7 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i
        if (tmp.size < sizeof(tmp))
                return ERR_PTR(-EINVAL);
 
-       ads = kmalloc(tmp.size, GFP_KERNEL);
-       if (!ads)
-               return ERR_PTR(-ENOMEM);
-
-       if (copy_from_user(ads, in, tmp.size)) {
-               kfree(ads);
-               return ERR_PTR(-EFAULT);
-       }
-
-       return ads;
+       return memdup_user(in, tmp.size);
 }
 
 static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
index 462859a..7ec1409 100644 (file)
@@ -377,6 +377,7 @@ again:
                                if (!list_empty(&worker->pending) ||
                                    !list_empty(&worker->prio_pending)) {
                                        spin_unlock_irq(&worker->lock);
+                                       set_current_state(TASK_RUNNING);
                                        goto again;
                                }
 
index 7a4dee1..6ad63f1 100644 (file)
@@ -137,8 +137,8 @@ struct btrfs_inode {
         * of extent items we've reserved metadata for.
         */
        spinlock_t accounting_lock;
+       atomic_t outstanding_extents;
        int reserved_extents;
-       int outstanding_extents;
 
        /*
         * ordered_data_close is set by truncate when a file that used
@@ -151,6 +151,7 @@ struct btrfs_inode {
         * of these.
         */
        unsigned ordered_data_close:1;
+       unsigned orphan_meta_reserved:1;
        unsigned dummy_inode:1;
 
        /*
index 6795a71..0d1d966 100644 (file)
@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                                       struct btrfs_root *root,
                                       struct extent_buffer *buf,
-                                      struct extent_buffer *cow)
+                                      struct extent_buffer *cow,
+                                      int *last_ref)
 {
        u64 refs;
        u64 owner;
@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        BUG_ON(ret);
                }
                clean_tree_block(trans, root, buf);
+               *last_ref = 1;
        }
        return 0;
 }
@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        struct btrfs_disk_key disk_key;
        struct extent_buffer *cow;
        int level;
+       int last_ref = 0;
        int unlock_orig = 0;
        u64 parent_start;
 
@@ -442,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                            (unsigned long)btrfs_header_fsid(cow),
                            BTRFS_FSID_SIZE);
 
-       update_ref_for_cow(trans, root, buf, cow);
+       update_ref_for_cow(trans, root, buf, cow, &last_ref);
+
+       if (root->ref_cows)
+               btrfs_reloc_cow_block(trans, root, buf, cow);
 
        if (buf == root->node) {
                WARN_ON(parent && parent != buf);
@@ -457,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                extent_buffer_get(cow);
                spin_unlock(&root->node_lock);
 
-               btrfs_free_tree_block(trans, root, buf->start, buf->len,
-                               parent_start, root->root_key.objectid, level);
+               btrfs_free_tree_block(trans, root, buf, parent_start,
+                                     last_ref);
                free_extent_buffer(buf);
                add_root_to_dirty_list(root);
        } else {
@@ -473,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                btrfs_set_node_ptr_generation(parent, parent_slot,
                                              trans->transid);
                btrfs_mark_buffer_dirty(parent);
-               btrfs_free_tree_block(trans, root, buf->start, buf->len,
-                               parent_start, root->root_key.objectid, level);
+               btrfs_free_tree_block(trans, root, buf, parent_start,
+                                     last_ref);
        }
        if (unlock_orig)
                btrfs_tree_unlock(buf);
@@ -949,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
        return bin_search(eb, key, level, slot);
 }
 
+static void root_add_used(struct btrfs_root *root, u32 size)
+{
+       spin_lock(&root->accounting_lock);
+       btrfs_set_root_used(&root->root_item,
+                           btrfs_root_used(&root->root_item) + size);
+       spin_unlock(&root->accounting_lock);
+}
+
+static void root_sub_used(struct btrfs_root *root, u32 size)
+{
+       spin_lock(&root->accounting_lock);
+       btrfs_set_root_used(&root->root_item,
+                           btrfs_root_used(&root->root_item) - size);
+       spin_unlock(&root->accounting_lock);
+}
+
 /* given a node and slot number, this reads the blocks it points to.  The
  * extent buffer is returned with a reference taken (but unlocked).
  * NULL is returned on error.
@@ -1019,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                btrfs_tree_lock(child);
                btrfs_set_lock_blocking(child);
                ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
-               BUG_ON(ret);
+               if (ret) {
+                       btrfs_tree_unlock(child);
+                       free_extent_buffer(child);
+                       goto enospc;
+               }
 
                spin_lock(&root->node_lock);
                root->node = child;
@@ -1034,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                btrfs_tree_unlock(mid);
                /* once for the path */
                free_extent_buffer(mid);
-               ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
-                                           0, root->root_key.objectid, level);
+
+               root_sub_used(root, mid->len);
+               btrfs_free_tree_block(trans, root, mid, 0, 1);
                /* once for the root ptr */
                free_extent_buffer(mid);
-               return ret;
+               return 0;
        }
        if (btrfs_header_nritems(mid) >
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
@@ -1088,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                if (wret < 0 && wret != -ENOSPC)
                        ret = wret;
                if (btrfs_header_nritems(right) == 0) {
-                       u64 bytenr = right->start;
-                       u32 blocksize = right->len;
-
                        clean_tree_block(trans, root, right);
                        btrfs_tree_unlock(right);
-                       free_extent_buffer(right);
-                       right = NULL;
                        wret = del_ptr(trans, root, path, level + 1, pslot +
                                       1);
                        if (wret)
                                ret = wret;
-                       wret = btrfs_free_tree_block(trans, root,
-                                                    bytenr, blocksize, 0,
-                                                    root->root_key.objectid,
-                                                    level);
-                       if (wret)
-                               ret = wret;
+                       root_sub_used(root, right->len);
+                       btrfs_free_tree_block(trans, root, right, 0, 1);
+                       free_extent_buffer(right);
+                       right = NULL;
                } else {
                        struct btrfs_disk_key right_key;
                        btrfs_node_key(right, &right_key, 0);
@@ -1136,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                BUG_ON(wret == 1);
        }
        if (btrfs_header_nritems(mid) == 0) {
-               /* we've managed to empty the middle node, drop it */
-               u64 bytenr = mid->start;
-               u32 blocksize = mid->len;
-
                clean_tree_block(trans, root, mid);
                btrfs_tree_unlock(mid);
-               free_extent_buffer(mid);
-               mid = NULL;
                wret = del_ptr(trans, root, path, level + 1, pslot);
                if (wret)
                        ret = wret;
-               wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
-                                        0, root->root_key.objectid, level);
-               if (wret)
-                       ret = wret;
+               root_sub_used(root, mid->len);
+               btrfs_free_tree_block(trans, root, mid, 0, 1);
+               free_extent_buffer(mid);
+               mid = NULL;
        } else {
                /* update the parent key to reflect our changes */
                struct btrfs_disk_key mid_key;
@@ -1590,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        btrfs_release_path(NULL, p);
 
        ret = -EAGAIN;
-       tmp = read_tree_block(root, blocknr, blocksize, gen);
+       tmp = read_tree_block(root, blocknr, blocksize, 0);
        if (tmp) {
                /*
                 * If the read above didn't mark this buffer up to date,
@@ -1740,7 +1754,6 @@ again:
                                              p->nodes[level + 1],
                                              p->slots[level + 1], &b);
                        if (err) {
-                               free_extent_buffer(b);
                                ret = err;
                                goto done;
                        }
@@ -2076,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        if (IS_ERR(c))
                return PTR_ERR(c);
 
+       root_add_used(root, root->nodesize);
+
        memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_nritems(c, 1);
        btrfs_set_header_level(c, level);
@@ -2134,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
        int nritems;
 
        BUG_ON(!path->nodes[level]);
+       btrfs_assert_tree_locked(path->nodes[level]);
        lower = path->nodes[level];
        nritems = btrfs_header_nritems(lower);
        BUG_ON(slot > nritems);
@@ -2202,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        if (IS_ERR(split))
                return PTR_ERR(split);
 
+       root_add_used(root, root->nodesize);
+
        memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_level(split, btrfs_header_level(c));
        btrfs_set_header_bytenr(split, split->start);
@@ -2415,6 +2433,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
 
        if (left_nritems)
                btrfs_mark_buffer_dirty(left);
+       else
+               clean_tree_block(trans, root, left);
+
        btrfs_mark_buffer_dirty(right);
 
        btrfs_item_key(right, &disk_key, 0);
@@ -2660,6 +2681,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(left);
        if (right_nritems)
                btrfs_mark_buffer_dirty(right);
+       else
+               clean_tree_block(trans, root, right);
 
        btrfs_item_key(right, &disk_key, 0);
        wret = fixup_low_keys(trans, root, path, &disk_key, 1);
@@ -2669,8 +2692,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        /* then fixup the leaf pointer in the path */
        if (path->slots[0] < push_items) {
                path->slots[0] += old_left_nritems;
-               if (btrfs_header_nritems(path->nodes[0]) == 0)
-                       clean_tree_block(trans, root, path->nodes[0]);
                btrfs_tree_unlock(path->nodes[0]);
                free_extent_buffer(path->nodes[0]);
                path->nodes[0] = left;
@@ -2932,10 +2953,10 @@ again:
        right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
                                        root->root_key.objectid,
                                        &disk_key, 0, l->start, 0);
-       if (IS_ERR(right)) {
-               BUG_ON(1);
+       if (IS_ERR(right))
                return PTR_ERR(right);
-       }
+
+       root_add_used(root, root->leafsize);
 
        memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_bytenr(right, right->start);
@@ -3054,7 +3075,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
 
        btrfs_set_path_blocking(path);
        ret = split_leaf(trans, root, &key, path, ins_len, 1);
-       BUG_ON(ret);
+       if (ret)
+               goto err;
 
        path->keep_locks = 0;
        btrfs_unlock_up_safe(path, 1);
@@ -3796,9 +3818,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
         */
        btrfs_unlock_up_safe(path, 0);
 
-       ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
-                                   0, root->root_key.objectid, 0);
-       return ret;
+       root_sub_used(root, leaf->len);
+
+       btrfs_free_tree_block(trans, root, leaf, 0, 1);
+       return 0;
 }
 /*
  * delete the item at the leaf level in path.  If that empties
@@ -3865,6 +3888,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                if (leaf == root->node) {
                        btrfs_set_header_level(leaf, 0);
                } else {
+                       btrfs_set_path_blocking(path);
+                       clean_tree_block(trans, root, leaf);
                        ret = btrfs_del_leaf(trans, root, path, leaf);
                        BUG_ON(ret);
                }
index 746a724..e9bf864 100644 (file)
@@ -34,6 +34,7 @@
 
 struct btrfs_trans_handle;
 struct btrfs_transaction;
+struct btrfs_pending_snapshot;
 extern struct kmem_cache *btrfs_trans_handle_cachep;
 extern struct kmem_cache *btrfs_transaction_cachep;
 extern struct kmem_cache *btrfs_bit_radix_cachep;
@@ -663,6 +664,7 @@ struct btrfs_csum_item {
 #define BTRFS_BLOCK_GROUP_RAID1    (1 << 4)
 #define BTRFS_BLOCK_GROUP_DUP     (1 << 5)
 #define BTRFS_BLOCK_GROUP_RAID10   (1 << 6)
+#define BTRFS_NR_RAID_TYPES       5
 
 struct btrfs_block_group_item {
        __le64 used;
@@ -674,42 +676,46 @@ struct btrfs_space_info {
        u64 flags;
 
        u64 total_bytes;        /* total bytes in the space */
-       u64 bytes_used;         /* total bytes used on disk */
+       u64 bytes_used;         /* total bytes used,
+                                  this does't take mirrors into account */
        u64 bytes_pinned;       /* total bytes pinned, will be freed when the
                                   transaction finishes */
        u64 bytes_reserved;     /* total bytes the allocator has reserved for
                                   current allocations */
        u64 bytes_readonly;     /* total bytes that are read only */
-       u64 bytes_super;        /* total bytes reserved for the super blocks */
-       u64 bytes_root;         /* the number of bytes needed to commit a
-                                  transaction */
+
        u64 bytes_may_use;      /* number of bytes that may be used for
                                   delalloc/allocations */
-       u64 bytes_delalloc;     /* number of bytes currently reserved for
-                                  delayed allocation */
+       u64 disk_used;          /* total bytes used on disk */
 
        int full;               /* indicates that we cannot allocate any more
                                   chunks for this space */
        int force_alloc;        /* set if we need to force a chunk alloc for
                                   this space */
-       int force_delalloc;     /* make people start doing filemap_flush until
-                                  we're under a threshold */
 
        struct list_head list;
 
-       /* for controlling how we free up space for allocations */
-       wait_queue_head_t allocate_wait;
-       wait_queue_head_t flush_wait;
-       int allocating_chunk;
-       int flushing;
-
        /* for block groups in our same type */
-       struct list_head block_groups;
+       struct list_head block_groups[BTRFS_NR_RAID_TYPES];
        spinlock_t lock;
        struct rw_semaphore groups_sem;
        atomic_t caching_threads;
 };
 
+struct btrfs_block_rsv {
+       u64 size;
+       u64 reserved;
+       u64 freed[2];
+       struct btrfs_space_info *space_info;
+       struct list_head list;
+       spinlock_t lock;
+       atomic_t usage;
+       unsigned int priority:8;
+       unsigned int durable:1;
+       unsigned int refill_used:1;
+       unsigned int full:1;
+};
+
 /*
  * free clusters are used to claim free space in relatively large chunks,
  * allowing us to do less seeky writes.  They are used for all metadata
@@ -760,6 +766,7 @@ struct btrfs_block_group_cache {
        spinlock_t lock;
        u64 pinned;
        u64 reserved;
+       u64 reserved_pinned;
        u64 bytes_super;
        u64 flags;
        u64 sectorsize;
@@ -825,6 +832,22 @@ struct btrfs_fs_info {
        /* logical->physical extent mapping */
        struct btrfs_mapping_tree mapping_tree;
 
+       /* block reservation for extent, checksum and root tree */
+       struct btrfs_block_rsv global_block_rsv;
+       /* block reservation for delay allocation */
+       struct btrfs_block_rsv delalloc_block_rsv;
+       /* block reservation for metadata operations */
+       struct btrfs_block_rsv trans_block_rsv;
+       /* block reservation for chunk tree */
+       struct btrfs_block_rsv chunk_block_rsv;
+
+       struct btrfs_block_rsv empty_block_rsv;
+
+       /* list of block reservations that cross multiple transactions */
+       struct list_head durable_block_rsv_list;
+
+       struct mutex durable_block_rsv_mutex;
+
        u64 generation;
        u64 last_trans_committed;
 
@@ -927,7 +950,6 @@ struct btrfs_fs_info {
        struct btrfs_workers endio_meta_write_workers;
        struct btrfs_workers endio_write_workers;
        struct btrfs_workers submit_workers;
-       struct btrfs_workers enospc_workers;
        /*
         * fixup workers take dirty pages that didn't properly go through
         * the cow mechanism and make them safe to write.  It happens
@@ -943,6 +965,7 @@ struct btrfs_fs_info {
        int do_barriers;
        int closing;
        int log_root_recovering;
+       int enospc_unlink;
 
        u64 total_pinned;
 
@@ -1012,6 +1035,9 @@ struct btrfs_root {
        struct completion kobj_unregister;
        struct mutex objectid_mutex;
 
+       spinlock_t accounting_lock;
+       struct btrfs_block_rsv *block_rsv;
+
        struct mutex log_mutex;
        wait_queue_head_t log_writer_wait;
        wait_queue_head_t log_commit_wait[2];
@@ -1043,7 +1069,6 @@ struct btrfs_root {
        int ref_cows;
        int track_dirty;
        int in_radix;
-       int clean_orphans;
 
        u64 defrag_trans_start;
        struct btrfs_key defrag_progress;
@@ -1057,8 +1082,11 @@ struct btrfs_root {
 
        struct list_head root_list;
 
-       spinlock_t list_lock;
+       spinlock_t orphan_lock;
        struct list_head orphan_list;
+       struct btrfs_block_rsv *orphan_block_rsv;
+       int orphan_item_inserted;
+       int orphan_cleanup_state;
 
        spinlock_t inode_lock;
        /* red-black tree that keeps track of in-memory inodes */
@@ -1965,6 +1993,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, unsigned long count);
 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, u64 bytenr,
+                            u64 num_bytes, u64 *refs, u64 *flags);
 int btrfs_pin_extent(struct btrfs_root *root,
                     u64 bytenr, u64 num, int reserved);
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
@@ -1984,10 +2015,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                                        u64 parent, u64 root_objectid,
                                        struct btrfs_disk_key *key, int level,
                                        u64 hint, u64 empty_size);
-int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root,
-                         u64 bytenr, u32 blocksize,
-                         u64 parent, u64 root_objectid, int level);
+void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
+                          struct extent_buffer *buf,
+                          u64 parent, int last_ref);
 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
                                            struct btrfs_root *root,
                                            u64 bytenr, u32 blocksize,
@@ -2041,27 +2072,49 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                           u64 size);
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 group_start);
-int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
-                               struct btrfs_block_group_cache *group);
-
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
-
-int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items);
-int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items);
-int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
-                                         struct inode *inode, int num_items);
-int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
-                                       struct inode *inode, int num_items);
-int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
-                               u64 bytes);
-void btrfs_free_reserved_data_space(struct btrfs_root *root,
-                                   struct inode *inode, u64 bytes);
-void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
-                                u64 bytes);
-void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
-                             u64 bytes);
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
+void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
+int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               int num_items, int *retries);
+void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root);
+int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
+                                 struct inode *inode);
+void btrfs_orphan_release_metadata(struct inode *inode);
+int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
+                               struct btrfs_pending_snapshot *pending);
+int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
+int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
+void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
+void btrfs_free_block_rsv(struct btrfs_root *root,
+                         struct btrfs_block_rsv *rsv);
+void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
+                                struct btrfs_block_rsv *rsv);
+int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct btrfs_block_rsv *block_rsv,
+                       u64 num_bytes, int *retries);
+int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root,
+                         struct btrfs_block_rsv *block_rsv,
+                         u64 min_reserved, int min_factor);
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
+                           struct btrfs_block_rsv *dst_rsv,
+                           u64 num_bytes);
+void btrfs_block_rsv_release(struct btrfs_root *root,
+                            struct btrfs_block_rsv *block_rsv,
+                            u64 num_bytes);
+int btrfs_set_block_group_ro(struct btrfs_root *root,
+                            struct btrfs_block_group_cache *cache);
+int btrfs_set_block_group_rw(struct btrfs_root *root,
+                            struct btrfs_block_group_cache *cache);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
@@ -2152,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
+int btrfs_drop_snapshot(struct btrfs_root *root,
+                       struct btrfs_block_rsv *block_rsv, int update_ref);
 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        struct extent_buffer *node,
@@ -2245,6 +2299,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           const char *name, int name_len,
                           u64 inode_objectid, u64 ref_objectid, u64 *index);
+struct btrfs_inode_ref *
+btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct btrfs_path *path,
+                       const char *name, int name_len,
+                       u64 inode_objectid, u64 ref_objectid, int mod);
 int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             struct btrfs_path *path, u64 objectid);
@@ -2257,6 +2317,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
                    struct btrfs_root *root, u64 bytenr, u64 len);
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
                          struct bio *bio, u32 *dst);
+int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
+                             struct bio *bio, u64 logical_offset, u32 *dst);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             u64 objectid, u64 pos,
@@ -2311,6 +2373,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                               u32 min_type);
 
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
                              struct extent_state **cached_state);
 int btrfs_writepages(struct address_space *mapping,
@@ -2349,10 +2412,20 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
 int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
 int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
 void btrfs_orphan_cleanup(struct btrfs_root *root);
+void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
+                               struct btrfs_pending_snapshot *pending,
+                               u64 *bytes_to_reserve);
+void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
+                               struct btrfs_pending_snapshot *pending);
+void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root);
 int btrfs_cont_expand(struct inode *inode, loff_t size);
 int btrfs_invalidate_inodes(struct btrfs_root *root);
 void btrfs_add_delayed_iput(struct inode *inode);
 void btrfs_run_delayed_iputs(struct btrfs_root *root);
+int btrfs_prealloc_file_range(struct inode *inode, int mode,
+                             u64 start, u64 num_bytes, u64 min_size,
+                             loff_t actual_len, u64 *alloc_hint);
 extern const struct dentry_operations btrfs_dentry_operations;
 
 /* ioctl.c */
@@ -2409,4 +2482,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root);
 int btrfs_recover_relocation(struct btrfs_root *root);
 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
+void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root, struct extent_buffer *buf,
+                          struct extent_buffer *cow);
+void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
+                             struct btrfs_pending_snapshot *pending,
+                             u64 *bytes_to_reserve);
+void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
+                             struct btrfs_pending_snapshot *pending);
 #endif
index 902ce50..e807b14 100644 (file)
@@ -319,107 +319,6 @@ out:
 }
 
 /*
- * helper function to lookup reference count and flags of extent.
- *
- * the head node for delayed ref is used to store the sum of all the
- * reference count modifications queued up in the rbtree. the head
- * node may also store the extent flags to set. This way you can check
- * to see what the reference count and extent flags would be if all of
- * the delayed refs are not processed.
- */
-int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *root, u64 bytenr,
-                            u64 num_bytes, u64 *refs, u64 *flags)
-{
-       struct btrfs_delayed_ref_node *ref;
-       struct btrfs_delayed_ref_head *head;
-       struct btrfs_delayed_ref_root *delayed_refs;
-       struct btrfs_path *path;
-       struct btrfs_extent_item *ei;
-       struct extent_buffer *leaf;
-       struct btrfs_key key;
-       u32 item_size;
-       u64 num_refs;
-       u64 extent_flags;
-       int ret;
-
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       key.objectid = bytenr;
-       key.type = BTRFS_EXTENT_ITEM_KEY;
-       key.offset = num_bytes;
-       delayed_refs = &trans->transaction->delayed_refs;
-again:
-       ret = btrfs_search_slot(trans, root->fs_info->extent_root,
-                               &key, path, 0, 0);
-       if (ret < 0)
-               goto out;
-
-       if (ret == 0) {
-               leaf = path->nodes[0];
-               item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-               if (item_size >= sizeof(*ei)) {
-                       ei = btrfs_item_ptr(leaf, path->slots[0],
-                                           struct btrfs_extent_item);
-                       num_refs = btrfs_extent_refs(leaf, ei);
-                       extent_flags = btrfs_extent_flags(leaf, ei);
-               } else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-                       struct btrfs_extent_item_v0 *ei0;
-                       BUG_ON(item_size != sizeof(*ei0));
-                       ei0 = btrfs_item_ptr(leaf, path->slots[0],
-                                            struct btrfs_extent_item_v0);
-                       num_refs = btrfs_extent_refs_v0(leaf, ei0);
-                       /* FIXME: this isn't correct for data */
-                       extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
-#else
-                       BUG();
-#endif
-               }
-               BUG_ON(num_refs == 0);
-       } else {
-               num_refs = 0;
-               extent_flags = 0;
-               ret = 0;
-       }
-
-       spin_lock(&delayed_refs->lock);
-       ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
-       if (ref) {
-               head = btrfs_delayed_node_to_head(ref);
-               if (!mutex_trylock(&head->mutex)) {
-                       atomic_inc(&ref->refs);
-                       spin_unlock(&delayed_refs->lock);
-
-                       btrfs_release_path(root->fs_info->extent_root, path);
-
-                       mutex_lock(&head->mutex);
-                       mutex_unlock(&head->mutex);
-                       btrfs_put_delayed_ref(ref);
-                       goto again;
-               }
-               if (head->extent_op && head->extent_op->update_flags)
-                       extent_flags |= head->extent_op->flags_to_set;
-               else
-                       BUG_ON(num_refs == 0);
-
-               num_refs += ref->ref_mod;
-               mutex_unlock(&head->mutex);
-       }
-       WARN_ON(num_refs == 0);
-       if (refs)
-               *refs = num_refs;
-       if (flags)
-               *flags = extent_flags;
-out:
-       spin_unlock(&delayed_refs->lock);
-       btrfs_free_path(path);
-       return ret;
-}
-
-/*
  * helper function to update an extent delayed ref in the
  * rbtree.  existing and update must both have the same
  * bytenr and parent
index f6fc67d..50e3cf9 100644 (file)
@@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
 struct btrfs_delayed_ref_head *
 btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
 int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
-int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *root, u64 bytenr,
-                            u64 num_bytes, u64 *refs, u64 *flags);
 int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
                          u64 bytenr, u64 num_bytes, u64 orig_parent,
                          u64 parent, u64 orig_ref_root, u64 ref_root,
index feca041..f3b287c 100644 (file)
@@ -74,6 +74,11 @@ struct async_submit_bio {
        int rw;
        int mirror_num;
        unsigned long bio_flags;
+       /*
+        * bio_offset is optional, can be used if the pages in the bio
+        * can't tell us where in the file the bio should go
+        */
+       u64 bio_offset;
        struct btrfs_work work;
 };
 
@@ -534,7 +539,8 @@ static void run_one_async_start(struct btrfs_work *work)
        async = container_of(work, struct  async_submit_bio, work);
        fs_info = BTRFS_I(async->inode)->root->fs_info;
        async->submit_bio_start(async->inode, async->rw, async->bio,
-                              async->mirror_num, async->bio_flags);
+                              async->mirror_num, async->bio_flags,
+                              async->bio_offset);
 }
 
 static void run_one_async_done(struct btrfs_work *work)
@@ -556,7 +562,8 @@ static void run_one_async_done(struct btrfs_work *work)
                wake_up(&fs_info->async_submit_wait);
 
        async->submit_bio_done(async->inode, async->rw, async->bio,
-                              async->mirror_num, async->bio_flags);
+                              async->mirror_num, async->bio_flags,
+                              async->bio_offset);
 }
 
 static void run_one_async_free(struct btrfs_work *work)
@@ -570,6 +577,7 @@ static void run_one_async_free(struct btrfs_work *work)
 int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
                        int rw, struct bio *bio, int mirror_num,
                        unsigned long bio_flags,
+                       u64 bio_offset,
                        extent_submit_bio_hook_t *submit_bio_start,
                        extent_submit_bio_hook_t *submit_bio_done)
 {
@@ -592,6 +600,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 
        async->work.flags = 0;
        async->bio_flags = bio_flags;
+       async->bio_offset = bio_offset;
 
        atomic_inc(&fs_info->nr_async_submits);
 
@@ -627,7 +636,8 @@ static int btree_csum_one_bio(struct bio *bio)
 
 static int __btree_submit_bio_start(struct inode *inode, int rw,
                                    struct bio *bio, int mirror_num,
-                                   unsigned long bio_flags)
+                                   unsigned long bio_flags,
+                                   u64 bio_offset)
 {
        /*
         * when we're called for a write, we're already in the async
@@ -638,7 +648,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw,
 }
 
 static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
-                                int mirror_num, unsigned long bio_flags)
+                                int mirror_num, unsigned long bio_flags,
+                                u64 bio_offset)
 {
        /*
         * when we're called for a write, we're already in the async
@@ -648,7 +659,8 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
 }
 
 static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
-                                int mirror_num, unsigned long bio_flags)
+                                int mirror_num, unsigned long bio_flags,
+                                u64 bio_offset)
 {
        int ret;
 
@@ -671,6 +683,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
         */
        return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
                                   inode, rw, bio, mirror_num, 0,
+                                  bio_offset,
                                   __btree_submit_bio_start,
                                   __btree_submit_bio_done);
 }
@@ -894,7 +907,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->ref_cows = 0;
        root->track_dirty = 0;
        root->in_radix = 0;
-       root->clean_orphans = 0;
+       root->orphan_item_inserted = 0;
+       root->orphan_cleanup_state = 0;
 
        root->fs_info = fs_info;
        root->objectid = objectid;
@@ -903,13 +917,16 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->name = NULL;
        root->in_sysfs = 0;
        root->inode_tree = RB_ROOT;
+       root->block_rsv = NULL;
+       root->orphan_block_rsv = NULL;
 
        INIT_LIST_HEAD(&root->dirty_list);
        INIT_LIST_HEAD(&root->orphan_list);
        INIT_LIST_HEAD(&root->root_list);
        spin_lock_init(&root->node_lock);
-       spin_lock_init(&root->list_lock);
+       spin_lock_init(&root->orphan_lock);
        spin_lock_init(&root->inode_lock);
+       spin_lock_init(&root->accounting_lock);
        mutex_init(&root->objectid_mutex);
        mutex_init(&root->log_mutex);
        init_waitqueue_head(&root->log_writer_wait);
@@ -968,42 +985,6 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
        return 0;
 }
 
-int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
-                            struct btrfs_fs_info *fs_info)
-{
-       struct extent_buffer *eb;
-       struct btrfs_root *log_root_tree = fs_info->log_root_tree;
-       u64 start = 0;
-       u64 end = 0;
-       int ret;
-
-       if (!log_root_tree)
-               return 0;
-
-       while (1) {
-               ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
-                               0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
-               if (ret)
-                       break;
-
-               clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
-                                 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
-       }
-       eb = fs_info->log_root_tree->node;
-
-       WARN_ON(btrfs_header_level(eb) != 0);
-       WARN_ON(btrfs_header_nritems(eb) != 0);
-
-       ret = btrfs_free_reserved_extent(fs_info->tree_root,
-                               eb->start, eb->len);
-       BUG_ON(ret);
-
-       free_extent_buffer(eb);
-       kfree(fs_info->log_root_tree);
-       fs_info->log_root_tree = NULL;
-       return 0;
-}
-
 static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
                                         struct btrfs_fs_info *fs_info)
 {
@@ -1191,19 +1172,23 @@ again:
        if (root)
                return root;
 
-       ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
-       if (ret == 0)
-               ret = -ENOENT;
-       if (ret < 0)
-               return ERR_PTR(ret);
-
        root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
        if (IS_ERR(root))
                return root;
 
-       WARN_ON(btrfs_root_refs(&root->root_item) == 0);
        set_anon_super(&root->anon_super, NULL);
 
+       if (btrfs_root_refs(&root->root_item) == 0) {
+               ret = -ENOENT;
+               goto fail;
+       }
+
+       ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+       if (ret < 0)
+               goto fail;
+       if (ret == 0)
+               root->orphan_item_inserted = 1;
+
        ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
        if (ret)
                goto fail;
@@ -1212,10 +1197,9 @@ again:
        ret = radix_tree_insert(&fs_info->fs_roots_radix,
                                (unsigned long)root->root_key.objectid,
                                root);
-       if (ret == 0) {
+       if (ret == 0)
                root->in_radix = 1;
-               root->clean_orphans = 1;
-       }
+
        spin_unlock(&fs_info->fs_roots_radix_lock);
        radix_tree_preload_end();
        if (ret) {
@@ -1461,10 +1445,6 @@ static int cleaner_kthread(void *arg)
        struct btrfs_root *root = arg;
 
        do {
-               smp_mb();
-               if (root->fs_info->closing)
-                       break;
-
                vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
 
                if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
@@ -1477,11 +1457,9 @@ static int cleaner_kthread(void *arg)
                if (freezing(current)) {
                        refrigerator();
                } else {
-                       smp_mb();
-                       if (root->fs_info->closing)
-                               break;
                        set_current_state(TASK_INTERRUPTIBLE);
-                       schedule();
+                       if (!kthread_should_stop())
+                               schedule();
                        __set_current_state(TASK_RUNNING);
                }
        } while (!kthread_should_stop());
@@ -1493,36 +1471,40 @@ static int transaction_kthread(void *arg)
        struct btrfs_root *root = arg;
        struct btrfs_trans_handle *trans;
        struct btrfs_transaction *cur;
+       u64 transid;
        unsigned long now;
        unsigned long delay;
        int ret;
 
        do {
-               smp_mb();
-               if (root->fs_info->closing)
-                       break;
-
                delay = HZ * 30;
                vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
                mutex_lock(&root->fs_info->transaction_kthread_mutex);
 
-               mutex_lock(&root->fs_info->trans_mutex);
+               spin_lock(&root->fs_info->new_trans_lock);
                cur = root->fs_info->running_transaction;
                if (!cur) {
-                       mutex_unlock(&root->fs_info->trans_mutex);
+                       spin_unlock(&root->fs_info->new_trans_lock);
                        goto sleep;
                }
 
                now = get_seconds();
-               if (now < cur->start_time || now - cur->start_time < 30) {
-                       mutex_unlock(&root->fs_info->trans_mutex);
+               if (!cur->blocked &&
+                   (now < cur->start_time || now - cur->start_time < 30)) {
+                       spin_unlock(&root->fs_info->new_trans_lock);
                        delay = HZ * 5;
                        goto sleep;
                }
-               mutex_unlock(&root->fs_info->trans_mutex);
-               trans = btrfs_start_transaction(root, 1);
-               ret = btrfs_commit_transaction(trans, root);
+               transid = cur->transid;
+               spin_unlock(&root->fs_info->new_trans_lock);
 
+               trans = btrfs_join_transaction(root, 1);
+               if (transid == trans->transid) {
+                       ret = btrfs_commit_transaction(trans, root);
+                       BUG_ON(ret);
+               } else {
+                       btrfs_end_transaction(trans, root);
+               }
 sleep:
                wake_up_process(root->fs_info->cleaner_kthread);
                mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -1530,10 +1512,10 @@ sleep:
                if (freezing(current)) {
                        refrigerator();
                } else {
-                       if (root->fs_info->closing)
-                               break;
                        set_current_state(TASK_INTERRUPTIBLE);
-                       schedule_timeout(delay);
+                       if (!kthread_should_stop() &&
+                           !btrfs_transaction_blocked(root->fs_info))
+                               schedule_timeout(delay);
                        __set_current_state(TASK_RUNNING);
                }
        } while (!kthread_should_stop());
@@ -1620,6 +1602,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
        btrfs_mapping_init(&fs_info->mapping_tree);
+       btrfs_init_block_rsv(&fs_info->global_block_rsv);
+       btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
+       btrfs_init_block_rsv(&fs_info->trans_block_rsv);
+       btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
+       btrfs_init_block_rsv(&fs_info->empty_block_rsv);
+       INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
+       mutex_init(&fs_info->durable_block_rsv_mutex);
        atomic_set(&fs_info->nr_async_submits, 0);
        atomic_set(&fs_info->async_delalloc_pages, 0);
        atomic_set(&fs_info->async_submit_draining, 0);
@@ -1759,9 +1748,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                           min_t(u64, fs_devices->num_devices,
                           fs_info->thread_pool_size),
                           &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->enospc_workers, "enospc",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
 
        /* a higher idle thresh on the submit workers makes it much more
         * likely that bios will be send down in a sane order to the
@@ -1809,7 +1795,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_start_workers(&fs_info->endio_meta_workers, 1);
        btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
        btrfs_start_workers(&fs_info->endio_write_workers, 1);
-       btrfs_start_workers(&fs_info->enospc_workers, 1);
 
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1912,17 +1897,18 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
        csum_root->track_dirty = 1;
 
+       fs_info->generation = generation;
+       fs_info->last_trans_committed = generation;
+       fs_info->data_alloc_profile = (u64)-1;
+       fs_info->metadata_alloc_profile = (u64)-1;
+       fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
+
        ret = btrfs_read_block_groups(extent_root);
        if (ret) {
                printk(KERN_ERR "Failed to read block groups: %d\n", ret);
                goto fail_block_groups;
        }
 
-       fs_info->generation = generation;
-       fs_info->last_trans_committed = generation;
-       fs_info->data_alloc_profile = (u64)-1;
-       fs_info->metadata_alloc_profile = (u64)-1;
-       fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
        fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                               "btrfs-cleaner");
        if (IS_ERR(fs_info->cleaner_kthread))
@@ -1977,6 +1963,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        BUG_ON(ret);
 
        if (!(sb->s_flags & MS_RDONLY)) {
+               ret = btrfs_cleanup_fs_roots(fs_info);
+               BUG_ON(ret);
+
                ret = btrfs_recover_relocation(tree_root);
                if (ret < 0) {
                        printk(KERN_WARNING
@@ -2040,7 +2029,6 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
-       btrfs_stop_workers(&fs_info->enospc_workers);
 fail_iput:
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        iput(fs_info->btree_inode);
@@ -2405,11 +2393,11 @@ int btrfs_commit_super(struct btrfs_root *root)
        down_write(&root->fs_info->cleanup_work_sem);
        up_write(&root->fs_info->cleanup_work_sem);
 
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_join_transaction(root, 1);
        ret = btrfs_commit_transaction(trans, root);
        BUG_ON(ret);
        /* run commit again to drop the original snapshot */
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_join_transaction(root, 1);
        btrfs_commit_transaction(trans, root);
        ret = btrfs_write_and_wait_transaction(NULL, root);
        BUG_ON(ret);
@@ -2426,15 +2414,15 @@ int close_ctree(struct btrfs_root *root)
        fs_info->closing = 1;
        smp_mb();
 
-       kthread_stop(root->fs_info->transaction_kthread);
-       kthread_stop(root->fs_info->cleaner_kthread);
-
        if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                ret =  btrfs_commit_super(root);
                if (ret)
                        printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
        }
 
+       kthread_stop(root->fs_info->transaction_kthread);
+       kthread_stop(root->fs_info->cleaner_kthread);
+
        fs_info->closing = 2;
        smp_mb();
 
@@ -2473,7 +2461,6 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
        btrfs_stop_workers(&fs_info->submit_workers);
-       btrfs_stop_workers(&fs_info->enospc_workers);
 
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
index c958ecb..88e825a 100644 (file)
@@ -87,7 +87,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
                        int metadata);
 int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
                        int rw, struct bio *bio, int mirror_num,
-                       unsigned long bio_flags,
+                       unsigned long bio_flags, u64 bio_offset,
                        extent_submit_bio_hook_t *submit_bio_start,
                        extent_submit_bio_hook_t *submit_bio_done);
 
@@ -95,8 +95,6 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
 int btrfs_write_tree_block(struct extent_buffer *buf);
 int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
-int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
-                            struct btrfs_fs_info *fs_info);
 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
                             struct btrfs_fs_info *fs_info);
 int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
index c6a4f45..b9080d7 100644 (file)
 
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
-                             u64 bytenr, u64 num_bytes, int alloc,
-                             int mark_free);
-static int update_reserved_extents(struct btrfs_block_group_cache *cache,
-                                  u64 num_bytes, int reserve);
+                             u64 bytenr, u64 num_bytes, int alloc);
+static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                u64 num_bytes, int reserve, int sinfo);
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 parent,
@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 alloc_bytes,
                          u64 flags, int force);
-static int pin_down_bytes(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root,
-                         struct btrfs_path *path,
-                         u64 bytenr, u64 num_bytes,
-                         int is_data, int reserved,
-                         struct extent_buffer **must_clean);
 static int find_next_key(struct btrfs_path *path, int level,
                         struct btrfs_key *key);
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
@@ -91,8 +84,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
 
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
 {
-       if (atomic_dec_and_test(&cache->count))
+       if (atomic_dec_and_test(&cache->count)) {
+               WARN_ON(cache->pinned > 0);
+               WARN_ON(cache->reserved > 0);
+               WARN_ON(cache->reserved_pinned > 0);
                kfree(cache);
+       }
 }
 
 /*
@@ -319,7 +316,7 @@ static int caching_kthread(void *data)
 
        exclude_super_stripes(extent_root, block_group);
        spin_lock(&block_group->space_info->lock);
-       block_group->space_info->bytes_super += block_group->bytes_super;
+       block_group->space_info->bytes_readonly += block_group->bytes_super;
        spin_unlock(&block_group->space_info->lock);
 
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -507,6 +504,9 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
        struct list_head *head = &info->space_info;
        struct btrfs_space_info *found;
 
+       flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
+                BTRFS_BLOCK_GROUP_METADATA;
+
        rcu_read_lock();
        list_for_each_entry_rcu(found, head, list) {
                if (found->flags == flags) {
@@ -610,6 +610,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
 }
 
 /*
+ * helper function to lookup reference count and flags of extent.
+ *
+ * the head node for delayed ref is used to store the sum of all the
+ * reference count modifications queued up in the rbtree. the head
+ * node may also store the extent flags to set. This way you can check
+ * to see what the reference count and extent flags would be if all of
+ * the delayed refs are not processed.
+ */
+int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root, u64 bytenr,
+                            u64 num_bytes, u64 *refs, u64 *flags)
+{
+       struct btrfs_delayed_ref_head *head;
+       struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_path *path;
+       struct btrfs_extent_item *ei;
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+       u32 item_size;
+       u64 num_refs;
+       u64 extent_flags;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = bytenr;
+       key.type = BTRFS_EXTENT_ITEM_KEY;
+       key.offset = num_bytes;
+       if (!trans) {
+               path->skip_locking = 1;
+               path->search_commit_root = 1;
+       }
+again:
+       ret = btrfs_search_slot(trans, root->fs_info->extent_root,
+                               &key, path, 0, 0);
+       if (ret < 0)
+               goto out_free;
+
+       if (ret == 0) {
+               leaf = path->nodes[0];
+               item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+               if (item_size >= sizeof(*ei)) {
+                       ei = btrfs_item_ptr(leaf, path->slots[0],
+                                           struct btrfs_extent_item);
+                       num_refs = btrfs_extent_refs(leaf, ei);
+                       extent_flags = btrfs_extent_flags(leaf, ei);
+               } else {
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+                       struct btrfs_extent_item_v0 *ei0;
+                       BUG_ON(item_size != sizeof(*ei0));
+                       ei0 = btrfs_item_ptr(leaf, path->slots[0],
+                                            struct btrfs_extent_item_v0);
+                       num_refs = btrfs_extent_refs_v0(leaf, ei0);
+                       /* FIXME: this isn't correct for data */
+                       extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+#else
+                       BUG();
+#endif
+               }
+               BUG_ON(num_refs == 0);
+       } else {
+               num_refs = 0;
+               extent_flags = 0;
+               ret = 0;
+       }
+
+       if (!trans)
+               goto out;
+
+       delayed_refs = &trans->transaction->delayed_refs;
+       spin_lock(&delayed_refs->lock);
+       head = btrfs_find_delayed_ref_head(trans, bytenr);
+       if (head) {
+               if (!mutex_trylock(&head->mutex)) {
+                       atomic_inc(&head->node.refs);
+                       spin_unlock(&delayed_refs->lock);
+
+                       btrfs_release_path(root->fs_info->extent_root, path);
+
+                       mutex_lock(&head->mutex);
+                       mutex_unlock(&head->mutex);
+                       btrfs_put_delayed_ref(&head->node);
+                       goto again;
+               }
+               if (head->extent_op && head->extent_op->update_flags)
+                       extent_flags |= head->extent_op->flags_to_set;
+               else
+                       BUG_ON(num_refs == 0);
+
+               num_refs += head->node.ref_mod;
+               mutex_unlock(&head->mutex);
+       }
+       spin_unlock(&delayed_refs->lock);
+out:
+       WARN_ON(num_refs == 0);
+       if (refs)
+               *refs = num_refs;
+       if (flags)
+               *flags = extent_flags;
+out_free:
+       btrfs_free_path(path);
+       return ret;
+}
+
+/*
  * Back reference rules.  Back refs have three main goals:
  *
  * 1) differentiate between all holders of references to an extent so that
@@ -1871,7 +1978,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-
 /* helper function to actually process a single delayed ref entry */
 static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
@@ -1891,32 +1997,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
                BUG_ON(extent_op);
                head = btrfs_delayed_node_to_head(node);
                if (insert_reserved) {
-                       int mark_free = 0;
-                       struct extent_buffer *must_clean = NULL;
-
-                       ret = pin_down_bytes(trans, root, NULL,
-                                            node->bytenr, node->num_bytes,
-                                            head->is_data, 1, &must_clean);
-                       if (ret > 0)
-                               mark_free = 1;
-
-                       if (must_clean) {
-                               clean_tree_block(NULL, root, must_clean);
-                               btrfs_tree_unlock(must_clean);
-                               free_extent_buffer(must_clean);
-                       }
+                       btrfs_pin_extent(root, node->bytenr,
+                                        node->num_bytes, 1);
                        if (head->is_data) {
                                ret = btrfs_del_csums(trans, root,
                                                      node->bytenr,
                                                      node->num_bytes);
                                BUG_ON(ret);
                        }
-                       if (mark_free) {
-                               ret = btrfs_free_reserved_extent(root,
-                                                       node->bytenr,
-                                                       node->num_bytes);
-                               BUG_ON(ret);
-                       }
                }
                mutex_unlock(&head->mutex);
                return 0;
@@ -2347,6 +2435,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
                ret = 0;
 out:
        btrfs_free_path(path);
+       if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+               WARN_ON(ret > 0);
        return ret;
 }
 
@@ -2660,12 +2750,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                             struct btrfs_space_info **space_info)
 {
        struct btrfs_space_info *found;
+       int i;
+       int factor;
+
+       if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+                    BTRFS_BLOCK_GROUP_RAID10))
+               factor = 2;
+       else
+               factor = 1;
 
        found = __find_space_info(info, flags);
        if (found) {
                spin_lock(&found->lock);
                found->total_bytes += total_bytes;
                found->bytes_used += bytes_used;
+               found->disk_used += bytes_used * factor;
                found->full = 0;
                spin_unlock(&found->lock);
                *space_info = found;
@@ -2675,18 +2774,20 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        if (!found)
                return -ENOMEM;
 
-       INIT_LIST_HEAD(&found->block_groups);
+       for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+               INIT_LIST_HEAD(&found->block_groups[i]);
        init_rwsem(&found->groups_sem);
-       init_waitqueue_head(&found->flush_wait);
-       init_waitqueue_head(&found->allocate_wait);
        spin_lock_init(&found->lock);
-       found->flags = flags;
+       found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
+                               BTRFS_BLOCK_GROUP_SYSTEM |
+                               BTRFS_BLOCK_GROUP_METADATA);
        found->total_bytes = total_bytes;
        found->bytes_used = bytes_used;
+       found->disk_used = bytes_used * factor;
        found->bytes_pinned = 0;
        found->bytes_reserved = 0;
        found->bytes_readonly = 0;
-       found->bytes_delalloc = 0;
+       found->bytes_may_use = 0;
        found->full = 0;
        found->force_alloc = 0;
        *space_info = found;
@@ -2711,19 +2812,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
        }
 }
 
-static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
-{
-       spin_lock(&cache->space_info->lock);
-       spin_lock(&cache->lock);
-       if (!cache->ro) {
-               cache->space_info->bytes_readonly += cache->key.offset -
-                                       btrfs_block_group_used(&cache->item);
-               cache->ro = 1;
-       }
-       spin_unlock(&cache->lock);
-       spin_unlock(&cache->space_info->lock);
-}
-
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 {
        u64 num_devices = root->fs_info->fs_devices->rw_devices;
@@ -2752,722 +2840,946 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
        return flags;
 }
 
-static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
-{
-       struct btrfs_fs_info *info = root->fs_info;
-       u64 alloc_profile;
-
-       if (data) {
-               alloc_profile = info->avail_data_alloc_bits &
-                       info->data_alloc_profile;
-               data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
-       } else if (root == root->fs_info->chunk_root) {
-               alloc_profile = info->avail_system_alloc_bits &
-                       info->system_alloc_profile;
-               data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
-       } else {
-               alloc_profile = info->avail_metadata_alloc_bits &
-                       info->metadata_alloc_profile;
-               data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
-       }
-
-       return btrfs_reduce_alloc_profile(root, data);
-}
-
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
+static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
 {
-       u64 alloc_target;
-
-       alloc_target = btrfs_get_alloc_profile(root, 1);
-       BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
-                                                      alloc_target);
+       if (flags & BTRFS_BLOCK_GROUP_DATA)
+               flags |= root->fs_info->avail_data_alloc_bits &
+                        root->fs_info->data_alloc_profile;
+       else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+               flags |= root->fs_info->avail_system_alloc_bits &
+                        root->fs_info->system_alloc_profile;
+       else if (flags & BTRFS_BLOCK_GROUP_METADATA)
+               flags |= root->fs_info->avail_metadata_alloc_bits &
+                        root->fs_info->metadata_alloc_profile;
+       return btrfs_reduce_alloc_profile(root, flags);
 }
 
-static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
+static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
 {
-       u64 num_bytes;
-       int level;
-
-       level = BTRFS_MAX_LEVEL - 2;
-       /*
-        * NOTE: these calculations are absolutely the worst possible case.
-        * This assumes that _every_ item we insert will require a new leaf, and
-        * that the tree has grown to its maximum level size.
-        */
+       u64 flags;
 
-       /*
-        * for every item we insert we could insert both an extent item and a
-        * extent ref item.  Then for ever item we insert, we will need to cow
-        * both the original leaf, plus the leaf to the left and right of it.
-        *
-        * Unless we are talking about the extent root, then we just want the
-        * number of items * 2, since we just need the extent item plus its ref.
-        */
-       if (root == root->fs_info->extent_root)
-               num_bytes = num_items * 2;
+       if (data)
+               flags = BTRFS_BLOCK_GROUP_DATA;
+       else if (root == root->fs_info->chunk_root)
+               flags = BTRFS_BLOCK_GROUP_SYSTEM;
        else
-               num_bytes = (num_items + (2 * num_items)) * 3;
+               flags = BTRFS_BLOCK_GROUP_METADATA;
 
-       /*
-        * num_bytes is total number of leaves we could need times the leaf
-        * size, and then for every leaf we could end up cow'ing 2 nodes per
-        * level, down to the leaf level.
-        */
-       num_bytes = (num_bytes * root->leafsize) +
-               (num_bytes * (level * 2)) * root->nodesize;
+       return get_alloc_profile(root, flags);
+}
 
-       return num_bytes;
+void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
+{
+       BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
+                                                      BTRFS_BLOCK_GROUP_DATA);
 }
 
 /*
- * Unreserve metadata space for delalloc.  If we have less reserved credits than
- * we have extents, this function does nothing.
+ * This will check the space that the inode allocates from to make sure we have
+ * enough space for bytes.
  */
-int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
-                                         struct inode *inode, int num_items)
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
 {
-       struct btrfs_fs_info *info = root->fs_info;
-       struct btrfs_space_info *meta_sinfo;
-       u64 num_bytes;
-       u64 alloc_target;
-       bool bug = false;
+       struct btrfs_space_info *data_sinfo;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       u64 used;
+       int ret = 0, committed = 0;
 
-       /* get the space info for where the metadata will live */
-       alloc_target = btrfs_get_alloc_profile(root, 0);
-       meta_sinfo = __find_space_info(info, alloc_target);
+       /* make sure bytes are sectorsize aligned */
+       bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
-       num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
-                                          num_items);
+       data_sinfo = BTRFS_I(inode)->space_info;
+       if (!data_sinfo)
+               goto alloc;
 
-       spin_lock(&meta_sinfo->lock);
-       spin_lock(&BTRFS_I(inode)->accounting_lock);
-       if (BTRFS_I(inode)->reserved_extents <=
-           BTRFS_I(inode)->outstanding_extents) {
-               spin_unlock(&BTRFS_I(inode)->accounting_lock);
-               spin_unlock(&meta_sinfo->lock);
-               return 0;
-       }
-       spin_unlock(&BTRFS_I(inode)->accounting_lock);
+again:
+       /* make sure we have enough space to handle the data first */
+       spin_lock(&data_sinfo->lock);
+       used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
+               data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
+               data_sinfo->bytes_may_use;
+
+       if (used + bytes > data_sinfo->total_bytes) {
+               struct btrfs_trans_handle *trans;
 
-       BTRFS_I(inode)->reserved_extents -= num_items;
-       BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
+               /*
+                * if we don't have enough free bytes in this space then we need
+                * to alloc a new chunk.
+                */
+               if (!data_sinfo->full) {
+                       u64 alloc_target;
 
-       if (meta_sinfo->bytes_delalloc < num_bytes) {
-               bug = true;
-               meta_sinfo->bytes_delalloc = 0;
-       } else {
-               meta_sinfo->bytes_delalloc -= num_bytes;
-       }
-       spin_unlock(&meta_sinfo->lock);
+                       data_sinfo->force_alloc = 1;
+                       spin_unlock(&data_sinfo->lock);
+alloc:
+                       alloc_target = btrfs_get_alloc_profile(root, 1);
+                       trans = btrfs_join_transaction(root, 1);
+                       if (IS_ERR(trans))
+                               return PTR_ERR(trans);
 
-       BUG_ON(bug);
+                       ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+                                            bytes + 2 * 1024 * 1024,
+                                            alloc_target, 0);
+                       btrfs_end_transaction(trans, root);
+                       if (ret < 0)
+                               return ret;
 
-       return 0;
-}
+                       if (!data_sinfo) {
+                               btrfs_set_inode_space_info(root, inode);
+                               data_sinfo = BTRFS_I(inode)->space_info;
+                       }
+                       goto again;
+               }
+               spin_unlock(&data_sinfo->lock);
 
-static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
-{
-       u64 thresh;
+               /* commit the current transaction and try again */
+               if (!committed && !root->fs_info->open_ioctl_trans) {
+                       committed = 1;
+                       trans = btrfs_join_transaction(root, 1);
+                       if (IS_ERR(trans))
+                               return PTR_ERR(trans);
+                       ret = btrfs_commit_transaction(trans, root);
+                       if (ret)
+                               return ret;
+                       goto again;
+               }
 
-       thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
-               meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
-               meta_sinfo->bytes_super + meta_sinfo->bytes_root +
-               meta_sinfo->bytes_may_use;
+#if 0 /* I hope we never need this code again, just in case */
+               printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
+                      "%llu bytes_reserved, " "%llu bytes_pinned, "
+                      "%llu bytes_readonly, %llu may use %llu total\n",
+                      (unsigned long long)bytes,
+                      (unsigned long long)data_sinfo->bytes_used,
+                      (unsigned long long)data_sinfo->bytes_reserved,
+                      (unsigned long long)data_sinfo->bytes_pinned,
+                      (unsigned long long)data_sinfo->bytes_readonly,
+                      (unsigned long long)data_sinfo->bytes_may_use,
+                      (unsigned long long)data_sinfo->total_bytes);
+#endif
+               return -ENOSPC;
+       }
+       data_sinfo->bytes_may_use += bytes;
+       BTRFS_I(inode)->reserved_bytes += bytes;
+       spin_unlock(&data_sinfo->lock);
 
-       thresh = meta_sinfo->total_bytes - thresh;
-       thresh *= 80;
-       do_div(thresh, 100);
-       if (thresh <= meta_sinfo->bytes_delalloc)
-               meta_sinfo->force_delalloc = 1;
-       else
-               meta_sinfo->force_delalloc = 0;
+       return 0;
 }
 
-struct async_flush {
-       struct btrfs_root *root;
-       struct btrfs_space_info *info;
-       struct btrfs_work work;
-};
-
-static noinline void flush_delalloc_async(struct btrfs_work *work)
+/*
+ * called when we are clearing an delalloc extent from the
+ * inode's io_tree or there was an error for whatever reason
+ * after calling btrfs_check_data_free_space
+ */
+void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
 {
-       struct async_flush *async;
-       struct btrfs_root *root;
-       struct btrfs_space_info *info;
-
-       async = container_of(work, struct async_flush, work);
-       root = async->root;
-       info = async->info;
-
-       btrfs_start_delalloc_inodes(root, 0);
-       wake_up(&info->flush_wait);
-       btrfs_wait_ordered_extents(root, 0, 0);
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_space_info *data_sinfo;
 
-       spin_lock(&info->lock);
-       info->flushing = 0;
-       spin_unlock(&info->lock);
-       wake_up(&info->flush_wait);
+       /* make sure bytes are sectorsize aligned */
+       bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
-       kfree(async);
+       data_sinfo = BTRFS_I(inode)->space_info;
+       spin_lock(&data_sinfo->lock);
+       data_sinfo->bytes_may_use -= bytes;
+       BTRFS_I(inode)->reserved_bytes -= bytes;
+       spin_unlock(&data_sinfo->lock);
 }
 
-static void wait_on_flush(struct btrfs_space_info *info)
+static void force_metadata_allocation(struct btrfs_fs_info *info)
 {
-       DEFINE_WAIT(wait);
-       u64 used;
-
-       while (1) {
-               prepare_to_wait(&info->flush_wait, &wait,
-                               TASK_UNINTERRUPTIBLE);
-               spin_lock(&info->lock);
-               if (!info->flushing) {
-                       spin_unlock(&info->lock);
-                       break;
-               }
+       struct list_head *head = &info->space_info;
+       struct btrfs_space_info *found;
 
-               used = info->bytes_used + info->bytes_reserved +
-                       info->bytes_pinned + info->bytes_readonly +
-                       info->bytes_super + info->bytes_root +
-                       info->bytes_may_use + info->bytes_delalloc;
-               if (used < info->total_bytes) {
-                       spin_unlock(&info->lock);
-                       break;
-               }
-               spin_unlock(&info->lock);
-               schedule();
+       rcu_read_lock();
+       list_for_each_entry_rcu(found, head, list) {
+               if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
+                       found->force_alloc = 1;
        }
-       finish_wait(&info->flush_wait, &wait);
+       rcu_read_unlock();
 }
 
-static void flush_delalloc(struct btrfs_root *root,
-                                struct btrfs_space_info *info)
+static int should_alloc_chunk(struct btrfs_space_info *sinfo,
+                             u64 alloc_bytes)
 {
-       struct async_flush *async;
-       bool wait = false;
-
-       spin_lock(&info->lock);
-
-       if (!info->flushing)
-               info->flushing = 1;
-       else
-               wait = true;
-
-       spin_unlock(&info->lock);
-
-       if (wait) {
-               wait_on_flush(info);
-               return;
-       }
-
-       async = kzalloc(sizeof(*async), GFP_NOFS);
-       if (!async)
-               goto flush;
-
-       async->root = root;
-       async->info = info;
-       async->work.func = flush_delalloc_async;
+       u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
 
-       btrfs_queue_worker(&root->fs_info->enospc_workers,
-                          &async->work);
-       wait_on_flush(info);
-       return;
+       if (sinfo->bytes_used + sinfo->bytes_reserved +
+           alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+               return 0;
 
-flush:
-       btrfs_start_delalloc_inodes(root, 0);
-       btrfs_wait_ordered_extents(root, 0, 0);
+       if (sinfo->bytes_used + sinfo->bytes_reserved +
+           alloc_bytes < div_factor(num_bytes, 8))
+               return 0;
 
-       spin_lock(&info->lock);
-       info->flushing = 0;
-       spin_unlock(&info->lock);
-       wake_up(&info->flush_wait);
+       return 1;
 }
 
-static int maybe_allocate_chunk(struct btrfs_root *root,
-                                struct btrfs_space_info *info)
+static int do_chunk_alloc(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *extent_root, u64 alloc_bytes,
+                         u64 flags, int force)
 {
-       struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
-       struct btrfs_trans_handle *trans;
-       bool wait = false;
+       struct btrfs_space_info *space_info;
+       struct btrfs_fs_info *fs_info = extent_root->fs_info;
        int ret = 0;
-       u64 min_metadata;
-       u64 free_space;
 
-       free_space = btrfs_super_total_bytes(disk_super);
-       /*
-        * we allow the metadata to grow to a max of either 10gb or 5% of the
-        * space in the volume.
-        */
-       min_metadata = min((u64)10 * 1024 * 1024 * 1024,
-                            div64_u64(free_space * 5, 100));
-       if (info->total_bytes >= min_metadata) {
-               spin_unlock(&info->lock);
-               return 0;
-       }
+       mutex_lock(&fs_info->chunk_mutex);
 
-       if (info->full) {
-               spin_unlock(&info->lock);
-               return 0;
+       flags = btrfs_reduce_alloc_profile(extent_root, flags);
+
+       space_info = __find_space_info(extent_root->fs_info, flags);
+       if (!space_info) {
+               ret = update_space_info(extent_root->fs_info, flags,
+                                       0, 0, &space_info);
+               BUG_ON(ret);
        }
+       BUG_ON(!space_info);
 
-       if (!info->allocating_chunk) {
-               info->force_alloc = 1;
-               info->allocating_chunk = 1;
-       } else {
-               wait = true;
+       spin_lock(&space_info->lock);
+       if (space_info->force_alloc)
+               force = 1;
+       if (space_info->full) {
+               spin_unlock(&space_info->lock);
+               goto out;
        }
 
-       spin_unlock(&info->lock);
-
-       if (wait) {
-               wait_event(info->allocate_wait,
-                          !info->allocating_chunk);
-               return 1;
+       if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
+               spin_unlock(&space_info->lock);
+               goto out;
        }
+       spin_unlock(&space_info->lock);
 
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans) {
-               ret = -ENOMEM;
-               goto out;
+       /*
+        * if we're doing a data chunk, go ahead and make sure that
+        * we keep a reasonable number of metadata chunks allocated in the
+        * FS as well.
+        */
+       if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
+               fs_info->data_chunk_allocations++;
+               if (!(fs_info->data_chunk_allocations %
+                     fs_info->metadata_ratio))
+                       force_metadata_allocation(fs_info);
        }
 
-       ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                            4096 + 2 * 1024 * 1024,
-                            info->flags, 0);
-       btrfs_end_transaction(trans, root);
+       ret = btrfs_alloc_chunk(trans, extent_root, flags);
+       spin_lock(&space_info->lock);
        if (ret)
-               goto out;
+               space_info->full = 1;
+       else
+               ret = 1;
+       space_info->force_alloc = 0;
+       spin_unlock(&space_info->lock);
 out:
-       spin_lock(&info->lock);
-       info->allocating_chunk = 0;
-       spin_unlock(&info->lock);
-       wake_up(&info->allocate_wait);
+       mutex_unlock(&extent_root->fs_info->chunk_mutex);
+       return ret;
+}
 
-       if (ret)
+static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct btrfs_space_info *sinfo, u64 num_bytes)
+{
+       int ret;
+       int end_trans = 0;
+
+       if (sinfo->full)
                return 0;
-       return 1;
+
+       spin_lock(&sinfo->lock);
+       ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
+       spin_unlock(&sinfo->lock);
+       if (!ret)
+               return 0;
+
+       if (!trans) {
+               trans = btrfs_join_transaction(root, 1);
+               BUG_ON(IS_ERR(trans));
+               end_trans = 1;
+       }
+
+       ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+                            num_bytes + 2 * 1024 * 1024,
+                            get_alloc_profile(root, sinfo->flags), 0);
+
+       if (end_trans)
+               btrfs_end_transaction(trans, root);
+
+       return ret == 1 ? 1 : 0;
 }
 
 /*
- * Reserve metadata space for delalloc.
+ * shrink metadata reservation for delalloc
  */
-int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
-                                       struct inode *inode, int num_items)
+static int shrink_delalloc(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root, u64 to_reclaim)
+{
+       struct btrfs_block_rsv *block_rsv;
+       u64 reserved;
+       u64 max_reclaim;
+       u64 reclaimed = 0;
+       int pause = 1;
+       int ret;
+
+       block_rsv = &root->fs_info->delalloc_block_rsv;
+       spin_lock(&block_rsv->lock);
+       reserved = block_rsv->reserved;
+       spin_unlock(&block_rsv->lock);
+
+       if (reserved == 0)
+               return 0;
+
+       max_reclaim = min(reserved, to_reclaim);
+
+       while (1) {
+               ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
+               if (!ret) {
+                       __set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(pause);
+                       pause <<= 1;
+                       if (pause > HZ / 10)
+                               pause = HZ / 10;
+               } else {
+                       pause = 1;
+               }
+
+               spin_lock(&block_rsv->lock);
+               if (reserved > block_rsv->reserved)
+                       reclaimed = reserved - block_rsv->reserved;
+               reserved = block_rsv->reserved;
+               spin_unlock(&block_rsv->lock);
+
+               if (reserved == 0 || reclaimed >= max_reclaim)
+                       break;
+
+               if (trans && trans->transaction->blocked)
+                       return -EAGAIN;
+       }
+       return reclaimed >= to_reclaim;
+}
+
+static int should_retry_reserve(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct btrfs_block_rsv *block_rsv,
+                               u64 num_bytes, int *retries)
 {
-       struct btrfs_fs_info *info = root->fs_info;
-       struct btrfs_space_info *meta_sinfo;
-       u64 num_bytes;
-       u64 used;
-       u64 alloc_target;
-       int flushed = 0;
-       int force_delalloc;
+       struct btrfs_space_info *space_info = block_rsv->space_info;
+       int ret;
 
-       /* get the space info for where the metadata will live */
-       alloc_target = btrfs_get_alloc_profile(root, 0);
-       meta_sinfo = __find_space_info(info, alloc_target);
+       if ((*retries) > 2)
+               return -ENOSPC;
 
-       num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
-                                          num_items);
-again:
-       spin_lock(&meta_sinfo->lock);
+       ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
+       if (ret)
+               return 1;
 
-       force_delalloc = meta_sinfo->force_delalloc;
+       if (trans && trans->transaction->in_commit)
+               return -ENOSPC;
 
-       if (unlikely(!meta_sinfo->bytes_root))
-               meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
+       ret = shrink_delalloc(trans, root, num_bytes);
+       if (ret)
+               return ret;
 
-       if (!flushed)
-               meta_sinfo->bytes_delalloc += num_bytes;
+       spin_lock(&space_info->lock);
+       if (space_info->bytes_pinned < num_bytes)
+               ret = 1;
+       spin_unlock(&space_info->lock);
+       if (ret)
+               return -ENOSPC;
 
-       used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
-               meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
-               meta_sinfo->bytes_super + meta_sinfo->bytes_root +
-               meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
+       (*retries)++;
 
-       if (used > meta_sinfo->total_bytes) {
-               flushed++;
+       if (trans)
+               return -EAGAIN;
 
-               if (flushed == 1) {
-                       if (maybe_allocate_chunk(root, meta_sinfo))
-                               goto again;
-                       flushed++;
+       trans = btrfs_join_transaction(root, 1);
+       BUG_ON(IS_ERR(trans));
+       ret = btrfs_commit_transaction(trans, root);
+       BUG_ON(ret);
+
+       return 1;
+}
+
+static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
+                                 u64 num_bytes)
+{
+       struct btrfs_space_info *space_info = block_rsv->space_info;
+       u64 unused;
+       int ret = -ENOSPC;
+
+       spin_lock(&space_info->lock);
+       unused = space_info->bytes_used + space_info->bytes_reserved +
+                space_info->bytes_pinned + space_info->bytes_readonly;
+
+       if (unused < space_info->total_bytes)
+               unused = space_info->total_bytes - unused;
+       else
+               unused = 0;
+
+       if (unused >= num_bytes) {
+               if (block_rsv->priority >= 10) {
+                       space_info->bytes_reserved += num_bytes;
+                       ret = 0;
                } else {
-                       spin_unlock(&meta_sinfo->lock);
+                       if ((unused + block_rsv->reserved) *
+                           block_rsv->priority >=
+                           (num_bytes + block_rsv->reserved) * 10) {
+                               space_info->bytes_reserved += num_bytes;
+                               ret = 0;
+                       }
                }
+       }
+       spin_unlock(&space_info->lock);
 
-               if (flushed == 2) {
-                       filemap_flush(inode->i_mapping);
-                       goto again;
-               } else if (flushed == 3) {
-                       flush_delalloc(root, meta_sinfo);
-                       goto again;
+       return ret;
+}
+
+static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root)
+{
+       struct btrfs_block_rsv *block_rsv;
+       if (root->ref_cows)
+               block_rsv = trans->block_rsv;
+       else
+               block_rsv = root->block_rsv;
+
+       if (!block_rsv)
+               block_rsv = &root->fs_info->empty_block_rsv;
+
+       return block_rsv;
+}
+
+static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
+                              u64 num_bytes)
+{
+       int ret = -ENOSPC;
+       spin_lock(&block_rsv->lock);
+       if (block_rsv->reserved >= num_bytes) {
+               block_rsv->reserved -= num_bytes;
+               if (block_rsv->reserved < block_rsv->size)
+                       block_rsv->full = 0;
+               ret = 0;
+       }
+       spin_unlock(&block_rsv->lock);
+       return ret;
+}
+
+static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
+                               u64 num_bytes, int update_size)
+{
+       spin_lock(&block_rsv->lock);
+       block_rsv->reserved += num_bytes;
+       if (update_size)
+               block_rsv->size += num_bytes;
+       else if (block_rsv->reserved >= block_rsv->size)
+               block_rsv->full = 1;
+       spin_unlock(&block_rsv->lock);
+}
+
+void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
+                            struct btrfs_block_rsv *dest, u64 num_bytes)
+{
+       struct btrfs_space_info *space_info = block_rsv->space_info;
+
+       spin_lock(&block_rsv->lock);
+       if (num_bytes == (u64)-1)
+               num_bytes = block_rsv->size;
+       block_rsv->size -= num_bytes;
+       if (block_rsv->reserved >= block_rsv->size) {
+               num_bytes = block_rsv->reserved - block_rsv->size;
+               block_rsv->reserved = block_rsv->size;
+               block_rsv->full = 1;
+       } else {
+               num_bytes = 0;
+       }
+       spin_unlock(&block_rsv->lock);
+
+       if (num_bytes > 0) {
+               if (dest) {
+                       block_rsv_add_bytes(dest, num_bytes, 0);
+               } else {
+                       spin_lock(&space_info->lock);
+                       space_info->bytes_reserved -= num_bytes;
+                       spin_unlock(&space_info->lock);
                }
-               spin_lock(&meta_sinfo->lock);
-               meta_sinfo->bytes_delalloc -= num_bytes;
-               spin_unlock(&meta_sinfo->lock);
-               printk(KERN_ERR "enospc, has %d, reserved %d\n",
-                      BTRFS_I(inode)->outstanding_extents,
-                      BTRFS_I(inode)->reserved_extents);
-               dump_space_info(meta_sinfo, 0, 0);
-               return -ENOSPC;
        }
+}
 
-       BTRFS_I(inode)->reserved_extents += num_items;
-       check_force_delalloc(meta_sinfo);
-       spin_unlock(&meta_sinfo->lock);
+static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
+                                  struct btrfs_block_rsv *dst, u64 num_bytes)
+{
+       int ret;
 
-       if (!flushed && force_delalloc)
-               filemap_flush(inode->i_mapping);
+       ret = block_rsv_use_bytes(src, num_bytes);
+       if (ret)
+               return ret;
 
+       block_rsv_add_bytes(dst, num_bytes, 1);
        return 0;
 }
 
-/*
- * unreserve num_items number of items worth of metadata space.  This needs to
- * be paired with btrfs_reserve_metadata_space.
- *
- * NOTE: if you have the option, run this _AFTER_ you do a
- * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
- * oprations which will result in more used metadata, so we want to make sure we
- * can do that without issue.
- */
-int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
+void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
 {
-       struct btrfs_fs_info *info = root->fs_info;
-       struct btrfs_space_info *meta_sinfo;
-       u64 num_bytes;
+       memset(rsv, 0, sizeof(*rsv));
+       spin_lock_init(&rsv->lock);
+       atomic_set(&rsv->usage, 1);
+       rsv->priority = 6;
+       INIT_LIST_HEAD(&rsv->list);
+}
+
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
+{
+       struct btrfs_block_rsv *block_rsv;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        u64 alloc_target;
-       bool bug = false;
 
-       /* get the space info for where the metadata will live */
-       alloc_target = btrfs_get_alloc_profile(root, 0);
-       meta_sinfo = __find_space_info(info, alloc_target);
+       block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
+       if (!block_rsv)
+               return NULL;
 
-       num_bytes = calculate_bytes_needed(root, num_items);
+       btrfs_init_block_rsv(block_rsv);
 
-       spin_lock(&meta_sinfo->lock);
-       if (meta_sinfo->bytes_may_use < num_bytes) {
-               bug = true;
-               meta_sinfo->bytes_may_use = 0;
-       } else {
-               meta_sinfo->bytes_may_use -= num_bytes;
-       }
-       spin_unlock(&meta_sinfo->lock);
+       alloc_target = btrfs_get_alloc_profile(root, 0);
+       block_rsv->space_info = __find_space_info(fs_info,
+                                                 BTRFS_BLOCK_GROUP_METADATA);
 
-       BUG_ON(bug);
+       return block_rsv;
+}
 
-       return 0;
+void btrfs_free_block_rsv(struct btrfs_root *root,
+                         struct btrfs_block_rsv *rsv)
+{
+       if (rsv && atomic_dec_and_test(&rsv->usage)) {
+               btrfs_block_rsv_release(root, rsv, (u64)-1);
+               if (!rsv->durable)
+                       kfree(rsv);
+       }
 }
 
 /*
- * Reserve some metadata space for use.  We'll calculate the worste case number
- * of bytes that would be needed to modify num_items number of items.  If we
- * have space, fantastic, if not, you get -ENOSPC.  Please call
- * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
- * items you reserved, since whatever metadata you needed should have already
- * been allocated.
- *
- * This will commit the transaction to make more space if we don't have enough
- * metadata space.  THe only time we don't do this is if we're reserving space
- * inside of a transaction, then we will just return -ENOSPC and it is the
- * callers responsibility to handle it properly.
+ * make the block_rsv struct be able to capture freed space.
+ * the captured space will re-add to the the block_rsv struct
+ * after transaction commit
  */
-int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
+void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
+                                struct btrfs_block_rsv *block_rsv)
 {
-       struct btrfs_fs_info *info = root->fs_info;
-       struct btrfs_space_info *meta_sinfo;
-       u64 num_bytes;
-       u64 used;
-       u64 alloc_target;
-       int retries = 0;
+       block_rsv->durable = 1;
+       mutex_lock(&fs_info->durable_block_rsv_mutex);
+       list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
+       mutex_unlock(&fs_info->durable_block_rsv_mutex);
+}
 
-       /* get the space info for where the metadata will live */
-       alloc_target = btrfs_get_alloc_profile(root, 0);
-       meta_sinfo = __find_space_info(info, alloc_target);
+int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct btrfs_block_rsv *block_rsv,
+                       u64 num_bytes, int *retries)
+{
+       int ret;
 
-       num_bytes = calculate_bytes_needed(root, num_items);
+       if (num_bytes == 0)
+               return 0;
 again:
-       spin_lock(&meta_sinfo->lock);
+       ret = reserve_metadata_bytes(block_rsv, num_bytes);
+       if (!ret) {
+               block_rsv_add_bytes(block_rsv, num_bytes, 1);
+               return 0;
+       }
 
-       if (unlikely(!meta_sinfo->bytes_root))
-               meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
+       ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
+       if (ret > 0)
+               goto again;
+
+       return ret;
+}
 
-       if (!retries)
-               meta_sinfo->bytes_may_use += num_bytes;
+int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root,
+                         struct btrfs_block_rsv *block_rsv,
+                         u64 min_reserved, int min_factor)
+{
+       u64 num_bytes = 0;
+       int commit_trans = 0;
+       int ret = -ENOSPC;
 
-       used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
-               meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
-               meta_sinfo->bytes_super + meta_sinfo->bytes_root +
-               meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
+       if (!block_rsv)
+               return 0;
 
-       if (used > meta_sinfo->total_bytes) {
-               retries++;
-               if (retries == 1) {
-                       if (maybe_allocate_chunk(root, meta_sinfo))
-                               goto again;
-                       retries++;
-               } else {
-                       spin_unlock(&meta_sinfo->lock);
-               }
+       spin_lock(&block_rsv->lock);
+       if (min_factor > 0)
+               num_bytes = div_factor(block_rsv->size, min_factor);
+       if (min_reserved > num_bytes)
+               num_bytes = min_reserved;
 
-               if (retries == 2) {
-                       flush_delalloc(root, meta_sinfo);
-                       goto again;
+       if (block_rsv->reserved >= num_bytes) {
+               ret = 0;
+       } else {
+               num_bytes -= block_rsv->reserved;
+               if (block_rsv->durable &&
+                   block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
+                       commit_trans = 1;
+       }
+       spin_unlock(&block_rsv->lock);
+       if (!ret)
+               return 0;
+
+       if (block_rsv->refill_used) {
+               ret = reserve_metadata_bytes(block_rsv, num_bytes);
+               if (!ret) {
+                       block_rsv_add_bytes(block_rsv, num_bytes, 0);
+                       return 0;
                }
-               spin_lock(&meta_sinfo->lock);
-               meta_sinfo->bytes_may_use -= num_bytes;
-               spin_unlock(&meta_sinfo->lock);
+       }
 
-               dump_space_info(meta_sinfo, 0, 0);
-               return -ENOSPC;
+       if (commit_trans) {
+               if (trans)
+                       return -EAGAIN;
+
+               trans = btrfs_join_transaction(root, 1);
+               BUG_ON(IS_ERR(trans));
+               ret = btrfs_commit_transaction(trans, root);
+               return 0;
        }
 
-       check_force_delalloc(meta_sinfo);
-       spin_unlock(&meta_sinfo->lock);
+       WARN_ON(1);
+       printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
+               block_rsv->size, block_rsv->reserved,
+               block_rsv->freed[0], block_rsv->freed[1]);
 
-       return 0;
+       return -ENOSPC;
+}
+
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
+                           struct btrfs_block_rsv *dst_rsv,
+                           u64 num_bytes)
+{
+       return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
+}
+
+void btrfs_block_rsv_release(struct btrfs_root *root,
+                            struct btrfs_block_rsv *block_rsv,
+                            u64 num_bytes)
+{
+       struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
+       if (global_rsv->full || global_rsv == block_rsv ||
+           block_rsv->space_info != global_rsv->space_info)
+               global_rsv = NULL;
+       block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
 }
 
 /*
- * This will check the space that the inode allocates from to make sure we have
- * enough space for bytes.
+ * helper to calculate size of global block reservation.
+ * the desired value is sum of space used by extent tree,
+ * checksum tree and root tree
  */
-int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
-                               u64 bytes)
+static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
 {
-       struct btrfs_space_info *data_sinfo;
-       u64 used;
-       int ret = 0, committed = 0, flushed = 0;
+       struct btrfs_space_info *sinfo;
+       u64 num_bytes;
+       u64 meta_used;
+       u64 data_used;
+       int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
+#if 0
+       /*
+        * per tree used space accounting can be inaccuracy, so we
+        * can't rely on it.
+        */
+       spin_lock(&fs_info->extent_root->accounting_lock);
+       num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
+       spin_unlock(&fs_info->extent_root->accounting_lock);
 
-       /* make sure bytes are sectorsize aligned */
-       bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+       spin_lock(&fs_info->csum_root->accounting_lock);
+       num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
+       spin_unlock(&fs_info->csum_root->accounting_lock);
 
-       data_sinfo = BTRFS_I(inode)->space_info;
-       if (!data_sinfo)
-               goto alloc;
+       spin_lock(&fs_info->tree_root->accounting_lock);
+       num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
+       spin_unlock(&fs_info->tree_root->accounting_lock);
+#endif
+       sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
+       spin_lock(&sinfo->lock);
+       data_used = sinfo->bytes_used;
+       spin_unlock(&sinfo->lock);
 
-again:
-       /* make sure we have enough space to handle the data first */
-       spin_lock(&data_sinfo->lock);
-       used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc +
-               data_sinfo->bytes_reserved + data_sinfo->bytes_pinned +
-               data_sinfo->bytes_readonly + data_sinfo->bytes_may_use +
-               data_sinfo->bytes_super;
+       sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+       spin_lock(&sinfo->lock);
+       meta_used = sinfo->bytes_used;
+       spin_unlock(&sinfo->lock);
 
-       if (used + bytes > data_sinfo->total_bytes) {
-               struct btrfs_trans_handle *trans;
+       num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
+                   csum_size * 2;
+       num_bytes += div64_u64(data_used + meta_used, 50);
 
-               if (!flushed) {
-                       spin_unlock(&data_sinfo->lock);
-                       flush_delalloc(root, data_sinfo);
-                       flushed = 1;
-                       goto again;
-               }
+       if (num_bytes * 3 > meta_used)
+               num_bytes = div64_u64(meta_used, 3);
 
-               /*
-                * if we don't have enough free bytes in this space then we need
-                * to alloc a new chunk.
-                */
-               if (!data_sinfo->full) {
-                       u64 alloc_target;
+       return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
+}
 
-                       data_sinfo->force_alloc = 1;
-                       spin_unlock(&data_sinfo->lock);
-alloc:
-                       alloc_target = btrfs_get_alloc_profile(root, 1);
-                       trans = btrfs_start_transaction(root, 1);
-                       if (!trans)
-                               return -ENOMEM;
+static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
+       struct btrfs_space_info *sinfo = block_rsv->space_info;
+       u64 num_bytes;
 
-                       ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                                            bytes + 2 * 1024 * 1024,
-                                            alloc_target, 0);
-                       btrfs_end_transaction(trans, root);
-                       if (ret)
-                               return ret;
+       num_bytes = calc_global_metadata_size(fs_info);
 
-                       if (!data_sinfo) {
-                               btrfs_set_inode_space_info(root, inode);
-                               data_sinfo = BTRFS_I(inode)->space_info;
-                       }
-                       goto again;
-               }
-               spin_unlock(&data_sinfo->lock);
+       spin_lock(&block_rsv->lock);
+       spin_lock(&sinfo->lock);
 
-               /* commit the current transaction and try again */
-               if (!committed && !root->fs_info->open_ioctl_trans) {
-                       committed = 1;
-                       trans = btrfs_join_transaction(root, 1);
-                       if (!trans)
-                               return -ENOMEM;
-                       ret = btrfs_commit_transaction(trans, root);
-                       if (ret)
-                               return ret;
-                       goto again;
-               }
+       block_rsv->size = num_bytes;
 
-               printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
-                      ", %llu bytes_used, %llu bytes_reserved, "
-                      "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
-                      "%llu total\n", (unsigned long long)bytes,
-                      (unsigned long long)data_sinfo->bytes_delalloc,
-                      (unsigned long long)data_sinfo->bytes_used,
-                      (unsigned long long)data_sinfo->bytes_reserved,
-                      (unsigned long long)data_sinfo->bytes_pinned,
-                      (unsigned long long)data_sinfo->bytes_readonly,
-                      (unsigned long long)data_sinfo->bytes_may_use,
-                      (unsigned long long)data_sinfo->total_bytes);
-               return -ENOSPC;
+       num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
+                   sinfo->bytes_reserved + sinfo->bytes_readonly;
+
+       if (sinfo->total_bytes > num_bytes) {
+               num_bytes = sinfo->total_bytes - num_bytes;
+               block_rsv->reserved += num_bytes;
+               sinfo->bytes_reserved += num_bytes;
        }
-       data_sinfo->bytes_may_use += bytes;
-       BTRFS_I(inode)->reserved_bytes += bytes;
-       spin_unlock(&data_sinfo->lock);
 
-       return 0;
+       if (block_rsv->reserved >= block_rsv->size) {
+               num_bytes = block_rsv->reserved - block_rsv->size;
+               sinfo->bytes_reserved -= num_bytes;
+               block_rsv->reserved = block_rsv->size;
+               block_rsv->full = 1;
+       }
+#if 0
+       printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
+               block_rsv->size, block_rsv->reserved);
+#endif
+       spin_unlock(&sinfo->lock);
+       spin_unlock(&block_rsv->lock);
 }
 
-/*
- * if there was an error for whatever reason after calling
- * btrfs_check_data_free_space, call this so we can cleanup the counters.
- */
-void btrfs_free_reserved_data_space(struct btrfs_root *root,
-                                   struct inode *inode, u64 bytes)
+static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
 {
-       struct btrfs_space_info *data_sinfo;
+       struct btrfs_space_info *space_info;
 
-       /* make sure bytes are sectorsize aligned */
-       bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+       space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+       fs_info->chunk_block_rsv.space_info = space_info;
+       fs_info->chunk_block_rsv.priority = 10;
 
-       data_sinfo = BTRFS_I(inode)->space_info;
-       spin_lock(&data_sinfo->lock);
-       data_sinfo->bytes_may_use -= bytes;
-       BTRFS_I(inode)->reserved_bytes -= bytes;
-       spin_unlock(&data_sinfo->lock);
+       space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+       fs_info->global_block_rsv.space_info = space_info;
+       fs_info->global_block_rsv.priority = 10;
+       fs_info->global_block_rsv.refill_used = 1;
+       fs_info->delalloc_block_rsv.space_info = space_info;
+       fs_info->trans_block_rsv.space_info = space_info;
+       fs_info->empty_block_rsv.space_info = space_info;
+       fs_info->empty_block_rsv.priority = 10;
+
+       fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
+       fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
+       fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
+       fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
+       fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
+
+       btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv);
+
+       btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);
+
+       update_global_block_rsv(fs_info);
 }
 
-/* called when we are adding a delalloc extent to the inode's io_tree */
-void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
-                                 u64 bytes)
+static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 {
-       struct btrfs_space_info *data_sinfo;
+       block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1);
+       WARN_ON(fs_info->delalloc_block_rsv.size > 0);
+       WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
+       WARN_ON(fs_info->trans_block_rsv.size > 0);
+       WARN_ON(fs_info->trans_block_rsv.reserved > 0);
+       WARN_ON(fs_info->chunk_block_rsv.size > 0);
+       WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
+}
 
-       /* get the space info for where this inode will be storing its data */
-       data_sinfo = BTRFS_I(inode)->space_info;
+static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
+{
+       return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
+               3 * num_items;
+}
 
-       /* make sure we have enough space to handle the data first */
-       spin_lock(&data_sinfo->lock);
-       data_sinfo->bytes_delalloc += bytes;
+int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                int num_items, int *retries)
+{
+       u64 num_bytes;
+       int ret;
 
-       /*
-        * we are adding a delalloc extent without calling
-        * btrfs_check_data_free_space first.  This happens on a weird
-        * writepage condition, but shouldn't hurt our accounting
-        */
-       if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
-               data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
-               BTRFS_I(inode)->reserved_bytes = 0;
-       } else {
-               data_sinfo->bytes_may_use -= bytes;
-               BTRFS_I(inode)->reserved_bytes -= bytes;
-       }
+       if (num_items == 0 || root->fs_info->chunk_root == root)
+               return 0;
 
-       spin_unlock(&data_sinfo->lock);
+       num_bytes = calc_trans_metadata_size(root, num_items);
+       ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
+                                 num_bytes, retries);
+       if (!ret) {
+               trans->bytes_reserved += num_bytes;
+               trans->block_rsv = &root->fs_info->trans_block_rsv;
+       }
+       return ret;
 }
 
-/* called when we are clearing an delalloc extent from the inode's io_tree */
-void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
-                             u64 bytes)
+void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root)
 {
-       struct btrfs_space_info *info;
+       if (!trans->bytes_reserved)
+               return;
 
-       info = BTRFS_I(inode)->space_info;
+       BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv);
+       btrfs_block_rsv_release(root, trans->block_rsv,
+                               trans->bytes_reserved);
+       trans->bytes_reserved = 0;
+}
 
-       spin_lock(&info->lock);
-       info->bytes_delalloc -= bytes;
-       spin_unlock(&info->lock);
+int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
+                                 struct inode *inode)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
+       struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
+
+       /*
+        * one for deleting orphan item, one for updating inode and
+        * two for calling btrfs_truncate_inode_items.
+        *
+        * btrfs_truncate_inode_items is a delete operation, it frees
+        * more space than it uses in most cases. So two units of
+        * metadata space should be enough for calling it many times.
+        * If all of the metadata space is used, we can commit
+        * transaction and use space it freed.
+        */
+       u64 num_bytes = calc_trans_metadata_size(root, 4);
+       return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
-static void force_metadata_allocation(struct btrfs_fs_info *info)
+void btrfs_orphan_release_metadata(struct inode *inode)
 {
-       struct list_head *head = &info->space_info;
-       struct btrfs_space_info *found;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       u64 num_bytes = calc_trans_metadata_size(root, 4);
+       btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
+}
 
-       rcu_read_lock();
-       list_for_each_entry_rcu(found, head, list) {
-               if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
-                       found->force_alloc = 1;
-       }
-       rcu_read_unlock();
+int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
+                               struct btrfs_pending_snapshot *pending)
+{
+       struct btrfs_root *root = pending->root;
+       struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
+       struct btrfs_block_rsv *dst_rsv = &pending->block_rsv;
+       /*
+        * two for root back/forward refs, two for directory entries
+        * and one for root of the snapshot.
+        */
+       u64 num_bytes = calc_trans_metadata_size(root, 5);
+       dst_rsv->space_info = src_rsv->space_info;
+       return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
-static int do_chunk_alloc(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *extent_root, u64 alloc_bytes,
-                         u64 flags, int force)
+static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
 {
-       struct btrfs_space_info *space_info;
-       struct btrfs_fs_info *fs_info = extent_root->fs_info;
-       u64 thresh;
-       int ret = 0;
+       return num_bytes >>= 3;
+}
 
-       mutex_lock(&fs_info->chunk_mutex);
+int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
+       u64 to_reserve;
+       int nr_extents;
+       int retries = 0;
+       int ret;
 
-       flags = btrfs_reduce_alloc_profile(extent_root, flags);
+       if (btrfs_transaction_in_commit(root->fs_info))
+               schedule_timeout(1);
 
-       space_info = __find_space_info(extent_root->fs_info, flags);
-       if (!space_info) {
-               ret = update_space_info(extent_root->fs_info, flags,
-                                       0, 0, &space_info);
-               BUG_ON(ret);
+       num_bytes = ALIGN(num_bytes, root->sectorsize);
+again:
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
+       if (nr_extents > BTRFS_I(inode)->reserved_extents) {
+               nr_extents -= BTRFS_I(inode)->reserved_extents;
+               to_reserve = calc_trans_metadata_size(root, nr_extents);
+       } else {
+               nr_extents = 0;
+               to_reserve = 0;
        }
-       BUG_ON(!space_info);
 
-       spin_lock(&space_info->lock);
-       if (space_info->force_alloc)
-               force = 1;
-       if (space_info->full) {
-               spin_unlock(&space_info->lock);
-               goto out;
+       to_reserve += calc_csum_metadata_size(inode, num_bytes);
+       ret = reserve_metadata_bytes(block_rsv, to_reserve);
+       if (ret) {
+               spin_unlock(&BTRFS_I(inode)->accounting_lock);
+               ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
+                                          &retries);
+               if (ret > 0)
+                       goto again;
+               return ret;
        }
 
-       thresh = space_info->total_bytes - space_info->bytes_readonly;
-       thresh = div_factor(thresh, 8);
-       if (!force &&
-          (space_info->bytes_used + space_info->bytes_pinned +
-           space_info->bytes_reserved + alloc_bytes) < thresh) {
-               spin_unlock(&space_info->lock);
-               goto out;
-       }
-       spin_unlock(&space_info->lock);
+       BTRFS_I(inode)->reserved_extents += nr_extents;
+       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
-       /*
-        * if we're doing a data chunk, go ahead and make sure that
-        * we keep a reasonable number of metadata chunks allocated in the
-        * FS as well.
-        */
-       if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
-               fs_info->data_chunk_allocations++;
-               if (!(fs_info->data_chunk_allocations %
-                     fs_info->metadata_ratio))
-                       force_metadata_allocation(fs_info);
+       block_rsv_add_bytes(block_rsv, to_reserve, 1);
+
+       if (block_rsv->size > 512 * 1024 * 1024)
+               shrink_delalloc(NULL, root, to_reserve);
+
+       return 0;
+}
+
+void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       u64 to_free;
+       int nr_extents;
+
+       num_bytes = ALIGN(num_bytes, root->sectorsize);
+       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
+       nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
+       if (nr_extents < BTRFS_I(inode)->reserved_extents) {
+               nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents;
+               BTRFS_I(inode)->reserved_extents -= nr_extents;
+       } else {
+               nr_extents = 0;
        }
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
-       ret = btrfs_alloc_chunk(trans, extent_root, flags);
-       spin_lock(&space_info->lock);
+       to_free = calc_csum_metadata_size(inode, num_bytes);
+       if (nr_extents > 0)
+               to_free += calc_trans_metadata_size(root, nr_extents);
+
+       btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
+                               to_free);
+}
+
+int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
+{
+       int ret;
+
+       ret = btrfs_check_data_free_space(inode, num_bytes);
        if (ret)
-               space_info->full = 1;
-       space_info->force_alloc = 0;
-       spin_unlock(&space_info->lock);
-out:
-       mutex_unlock(&extent_root->fs_info->chunk_mutex);
-       return ret;
+               return ret;
+
+       ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
+       if (ret) {
+               btrfs_free_reserved_data_space(inode, num_bytes);
+               return ret;
+       }
+
+       return 0;
+}
+
+void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
+{
+       btrfs_delalloc_release_metadata(inode, num_bytes);
+       btrfs_free_reserved_data_space(inode, num_bytes);
 }
 
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
-                             u64 bytenr, u64 num_bytes, int alloc,
-                             int mark_free)
+                             u64 bytenr, u64 num_bytes, int alloc)
 {
        struct btrfs_block_group_cache *cache;
        struct btrfs_fs_info *info = root->fs_info;
+       int factor;
        u64 total = num_bytes;
        u64 old_val;
        u64 byte_in_group;
@@ -3486,6 +3798,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                cache = btrfs_lookup_block_group(info, bytenr);
                if (!cache)
                        return -1;
+               if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
+                                   BTRFS_BLOCK_GROUP_RAID1 |
+                                   BTRFS_BLOCK_GROUP_RAID10))
+                       factor = 2;
+               else
+                       factor = 1;
                byte_in_group = bytenr - cache->key.objectid;
                WARN_ON(byte_in_group > cache->key.offset);
 
@@ -3498,31 +3816,24 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                        old_val += num_bytes;
                        btrfs_set_block_group_used(&cache->item, old_val);
                        cache->reserved -= num_bytes;
-                       cache->space_info->bytes_used += num_bytes;
                        cache->space_info->bytes_reserved -= num_bytes;
-                       if (cache->ro)
-                               cache->space_info->bytes_readonly -= num_bytes;
+                       cache->space_info->bytes_used += num_bytes;
+                       cache->space_info->disk_used += num_bytes * factor;
                        spin_unlock(&cache->lock);
                        spin_unlock(&cache->space_info->lock);
                } else {
                        old_val -= num_bytes;
-                       cache->space_info->bytes_used -= num_bytes;
-                       if (cache->ro)
-                               cache->space_info->bytes_readonly += num_bytes;
                        btrfs_set_block_group_used(&cache->item, old_val);
+                       cache->pinned += num_bytes;
+                       cache->space_info->bytes_pinned += num_bytes;
+                       cache->space_info->bytes_used -= num_bytes;
+                       cache->space_info->disk_used -= num_bytes * factor;
                        spin_unlock(&cache->lock);
                        spin_unlock(&cache->space_info->lock);
-                       if (mark_free) {
-                               int ret;
 
-                               ret = btrfs_discard_extent(root, bytenr,
-                                                          num_bytes);
-                               WARN_ON(ret);
-
-                               ret = btrfs_add_free_space(cache, bytenr,
-                                                          num_bytes);
-                               WARN_ON(ret);
-                       }
+                       set_extent_dirty(info->pinned_extents,
+                                        bytenr, bytenr + num_bytes - 1,
+                                        GFP_NOFS | __GFP_NOFAIL);
                }
                btrfs_put_block_group(cache);
                total -= num_bytes;
@@ -3546,18 +3857,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
        return bytenr;
 }
 
-/*
- * this function must be called within transaction
- */
-int btrfs_pin_extent(struct btrfs_root *root,
-                    u64 bytenr, u64 num_bytes, int reserved)
+static int pin_down_extent(struct btrfs_root *root,
+                          struct btrfs_block_group_cache *cache,
+                          u64 bytenr, u64 num_bytes, int reserved)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
-       struct btrfs_block_group_cache *cache;
-
-       cache = btrfs_lookup_block_group(fs_info, bytenr);
-       BUG_ON(!cache);
-
        spin_lock(&cache->space_info->lock);
        spin_lock(&cache->lock);
        cache->pinned += num_bytes;
@@ -3569,28 +3872,68 @@ int btrfs_pin_extent(struct btrfs_root *root,
        spin_unlock(&cache->lock);
        spin_unlock(&cache->space_info->lock);
 
-       btrfs_put_block_group(cache);
+       set_extent_dirty(root->fs_info->pinned_extents, bytenr,
+                        bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
+       return 0;
+}
+
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent(struct btrfs_root *root,
+                    u64 bytenr, u64 num_bytes, int reserved)
+{
+       struct btrfs_block_group_cache *cache;
 
-       set_extent_dirty(fs_info->pinned_extents,
-                        bytenr, bytenr + num_bytes - 1, GFP_NOFS);
+       cache = btrfs_lookup_block_group(root->fs_info, bytenr);
+       BUG_ON(!cache);
+
+       pin_down_extent(root, cache, bytenr, num_bytes, reserved);
+
+       btrfs_put_block_group(cache);
        return 0;
 }
 
-static int update_reserved_extents(struct btrfs_block_group_cache *cache,
-                                  u64 num_bytes, int reserve)
+/*
+ * update size of reserved extents. this function may return -EAGAIN
+ * if 'reserve' is true or 'sinfo' is false.
+ */
+static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                u64 num_bytes, int reserve, int sinfo)
 {
-       spin_lock(&cache->space_info->lock);
-       spin_lock(&cache->lock);
-       if (reserve) {
-               cache->reserved += num_bytes;
-               cache->space_info->bytes_reserved += num_bytes;
+       int ret = 0;
+       if (sinfo) {
+               struct btrfs_space_info *space_info = cache->space_info;
+               spin_lock(&space_info->lock);
+               spin_lock(&cache->lock);
+               if (reserve) {
+                       if (cache->ro) {
+                               ret = -EAGAIN;
+                       } else {
+                               cache->reserved += num_bytes;
+                               space_info->bytes_reserved += num_bytes;
+                       }
+               } else {
+                       if (cache->ro)
+                               space_info->bytes_readonly += num_bytes;
+                       cache->reserved -= num_bytes;
+                       space_info->bytes_reserved -= num_bytes;
+               }
+               spin_unlock(&cache->lock);
+               spin_unlock(&space_info->lock);
        } else {
-               cache->reserved -= num_bytes;
-               cache->space_info->bytes_reserved -= num_bytes;
+               spin_lock(&cache->lock);
+               if (cache->ro) {
+                       ret = -EAGAIN;
+               } else {
+                       if (reserve)
+                               cache->reserved += num_bytes;
+                       else
+                               cache->reserved -= num_bytes;
+               }
+               spin_unlock(&cache->lock);
        }
-       spin_unlock(&cache->lock);
-       spin_unlock(&cache->space_info->lock);
-       return 0;
+       return ret;
 }
 
 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
@@ -3621,6 +3964,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
                fs_info->pinned_extents = &fs_info->freed_extents[0];
 
        up_write(&fs_info->extent_commit_sem);
+
+       update_global_block_rsv(fs_info);
        return 0;
 }
 
@@ -3647,14 +3992,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
                        btrfs_add_free_space(cache, start, len);
                }
 
+               start += len;
+
                spin_lock(&cache->space_info->lock);
                spin_lock(&cache->lock);
                cache->pinned -= len;
                cache->space_info->bytes_pinned -= len;
+               if (cache->ro) {
+                       cache->space_info->bytes_readonly += len;
+               } else if (cache->reserved_pinned > 0) {
+                       len = min(len, cache->reserved_pinned);
+                       cache->reserved_pinned -= len;
+                       cache->space_info->bytes_reserved += len;
+               }
                spin_unlock(&cache->lock);
                spin_unlock(&cache->space_info->lock);
-
-               start += len;
        }
 
        if (cache)
@@ -3667,8 +4019,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_io_tree *unpin;
+       struct btrfs_block_rsv *block_rsv;
+       struct btrfs_block_rsv *next_rsv;
        u64 start;
        u64 end;
+       int idx;
        int ret;
 
        if (fs_info->pinned_extents == &fs_info->freed_extents[0])
@@ -3689,59 +4044,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                cond_resched();
        }
 
-       return ret;
-}
-
-static int pin_down_bytes(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root,
-                         struct btrfs_path *path,
-                         u64 bytenr, u64 num_bytes,
-                         int is_data, int reserved,
-                         struct extent_buffer **must_clean)
-{
-       int err = 0;
-       struct extent_buffer *buf;
-
-       if (is_data)
-               goto pinit;
-
-       /*
-        * discard is sloooow, and so triggering discards on
-        * individual btree blocks isn't a good plan.  Just
-        * pin everything in discard mode.
-        */
-       if (btrfs_test_opt(root, DISCARD))
-               goto pinit;
+       mutex_lock(&fs_info->durable_block_rsv_mutex);
+       list_for_each_entry_safe(block_rsv, next_rsv,
+                                &fs_info->durable_block_rsv_list, list) {
 
-       buf = btrfs_find_tree_block(root, bytenr, num_bytes);
-       if (!buf)
-               goto pinit;
+               idx = trans->transid & 0x1;
+               if (block_rsv->freed[idx] > 0) {
+                       block_rsv_add_bytes(block_rsv,
+                                           block_rsv->freed[idx], 0);
+                       block_rsv->freed[idx] = 0;
+               }
+               if (atomic_read(&block_rsv->usage) == 0) {
+                       btrfs_block_rsv_release(root, block_rsv, (u64)-1);
 
-       /* we can reuse a block if it hasn't been written
-        * and it is from this transaction.  We can't
-        * reuse anything from the tree log root because
-        * it has tiny sub-transactions.
-        */
-       if (btrfs_buffer_uptodate(buf, 0) &&
-           btrfs_try_tree_lock(buf)) {
-               u64 header_owner = btrfs_header_owner(buf);
-               u64 header_transid = btrfs_header_generation(buf);
-               if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
-                   header_transid == trans->transid &&
-                   !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
-                       *must_clean = buf;
-                       return 1;
+                       if (block_rsv->freed[0] == 0 &&
+                           block_rsv->freed[1] == 0) {
+                               list_del_init(&block_rsv->list);
+                               kfree(block_rsv);
+                       }
+               } else {
+                       btrfs_block_rsv_release(root, block_rsv, 0);
                }
-               btrfs_tree_unlock(buf);
        }
-       free_extent_buffer(buf);
-pinit:
-       if (path)
-               btrfs_set_path_blocking(path);
-       /* unlocks the pinned mutex */
-       btrfs_pin_extent(root, bytenr, num_bytes, reserved);
+       mutex_unlock(&fs_info->durable_block_rsv_mutex);
 
-       BUG_ON(err < 0);
        return 0;
 }
 
@@ -3902,9 +4228,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        BUG_ON(ret);
                }
        } else {
-               int mark_free = 0;
-               struct extent_buffer *must_clean = NULL;
-
                if (found_extent) {
                        BUG_ON(is_data && refs_to_drop !=
                               extent_data_ref_count(root, path, iref));
@@ -3917,31 +4240,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        }
                }
 
-               ret = pin_down_bytes(trans, root, path, bytenr,
-                                    num_bytes, is_data, 0, &must_clean);
-               if (ret > 0)
-                       mark_free = 1;
-               BUG_ON(ret < 0);
-               /*
-                * it is going to be very rare for someone to be waiting
-                * on the block we're freeing.  del_items might need to
-                * schedule, so rather than get fancy, just force it
-                * to blocking here
-                */
-               if (must_clean)
-                       btrfs_set_lock_blocking(must_clean);
-
                ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
                                      num_to_del);
                BUG_ON(ret);
                btrfs_release_path(extent_root, path);
 
-               if (must_clean) {
-                       clean_tree_block(NULL, root, must_clean);
-                       btrfs_tree_unlock(must_clean);
-                       free_extent_buffer(must_clean);
-               }
-
                if (is_data) {
                        ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
                        BUG_ON(ret);
@@ -3951,8 +4254,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                             (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
                }
 
-               ret = update_block_group(trans, root, bytenr, num_bytes, 0,
-                                        mark_free);
+               ret = update_block_group(trans, root, bytenr, num_bytes, 0);
                BUG_ON(ret);
        }
        btrfs_free_path(path);
@@ -3960,7 +4262,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 }
 
 /*
- * when we free an extent, it is possible (and likely) that we free the last
+ * when we free an block, it is possible (and likely) that we free the last
  * delayed ref for that extent as well.  This searches the delayed ref tree for
  * a given extent, and if there are no other delayed refs to be processed, it
  * removes it from the tree.
@@ -3972,7 +4274,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_delayed_ref_node *ref;
        struct rb_node *node;
-       int ret;
+       int ret = 0;
 
        delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
@@ -4024,17 +4326,99 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
        list_del_init(&head->cluster);
        spin_unlock(&delayed_refs->lock);
 
-       ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
-                                 &head->node, head->extent_op,
-                                 head->must_insert_reserved);
-       BUG_ON(ret);
+       BUG_ON(head->extent_op);
+       if (head->must_insert_reserved)
+               ret = 1;
+
+       mutex_unlock(&head->mutex);
        btrfs_put_delayed_ref(&head->node);
-       return 0;
+       return ret;
 out:
        spin_unlock(&delayed_refs->lock);
        return 0;
 }
 
+void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
+                          struct extent_buffer *buf,
+                          u64 parent, int last_ref)
+{
+       struct btrfs_block_rsv *block_rsv;
+       struct btrfs_block_group_cache *cache = NULL;
+       int ret;
+
+       if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
+               ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
+                                               parent, root->root_key.objectid,
+                                               btrfs_header_level(buf),
+                                               BTRFS_DROP_DELAYED_REF, NULL);
+               BUG_ON(ret);
+       }
+
+       if (!last_ref)
+               return;
+
+       block_rsv = get_block_rsv(trans, root);
+       cache = btrfs_lookup_block_group(root->fs_info, buf->start);
+       BUG_ON(block_rsv->space_info != cache->space_info);
+
+       if (btrfs_header_generation(buf) == trans->transid) {
+               if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
+                       ret = check_ref_cleanup(trans, root, buf->start);
+                       if (!ret)
+                               goto pin;
+               }
+
+               if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
+                       pin_down_extent(root, cache, buf->start, buf->len, 1);
+                       goto pin;
+               }
+
+               WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
+
+               btrfs_add_free_space(cache, buf->start, buf->len);
+               ret = update_reserved_bytes(cache, buf->len, 0, 0);
+               if (ret == -EAGAIN) {
+                       /* block group became read-only */
+                       update_reserved_bytes(cache, buf->len, 0, 1);
+                       goto out;
+               }
+
+               ret = 1;
+               spin_lock(&block_rsv->lock);
+               if (block_rsv->reserved < block_rsv->size) {
+                       block_rsv->reserved += buf->len;
+                       ret = 0;
+               }
+               spin_unlock(&block_rsv->lock);
+
+               if (ret) {
+                       spin_lock(&cache->space_info->lock);
+                       cache->space_info->bytes_reserved -= buf->len;
+                       spin_unlock(&cache->space_info->lock);
+               }
+               goto out;
+       }
+pin:
+       if (block_rsv->durable && !cache->ro) {
+               ret = 0;
+               spin_lock(&cache->lock);
+               if (!cache->ro) {
+                       cache->reserved_pinned += buf->len;
+                       ret = 1;
+               }
+               spin_unlock(&cache->lock);
+
+               if (ret) {
+                       spin_lock(&block_rsv->lock);
+                       block_rsv->freed[trans->transid & 0x1] += buf->len;
+                       spin_unlock(&block_rsv->lock);
+               }
+       }
+out:
+       btrfs_put_block_group(cache);
+}
+
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root,
                      u64 bytenr, u64 num_bytes, u64 parent,
@@ -4056,8 +4440,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
                                        parent, root_objectid, (int)owner,
                                        BTRFS_DROP_DELAYED_REF, NULL);
                BUG_ON(ret);
-               ret = check_ref_cleanup(trans, root, bytenr);
-               BUG_ON(ret);
        } else {
                ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
                                        parent, root_objectid, owner,
@@ -4067,21 +4449,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root,
-                         u64 bytenr, u32 blocksize,
-                         u64 parent, u64 root_objectid, int level)
-{
-       u64 used;
-       spin_lock(&root->node_lock);
-       used = btrfs_root_used(&root->root_item) - blocksize;
-       btrfs_set_root_used(&root->root_item, used);
-       spin_unlock(&root->node_lock);
-
-       return btrfs_free_extent(trans, root, bytenr, blocksize,
-                                parent, root_objectid, level, 0);
-}
-
 static u64 stripe_align(struct btrfs_root *root, u64 val)
 {
        u64 mask = ((u64)root->stripesize - 1);
@@ -4134,6 +4501,22 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
        return 0;
 }
 
+static int get_block_group_index(struct btrfs_block_group_cache *cache)
+{
+       int index;
+       if (cache->flags & BTRFS_BLOCK_GROUP_RAID10)
+               index = 0;
+       else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1)
+               index = 1;
+       else if (cache->flags & BTRFS_BLOCK_GROUP_DUP)
+               index = 2;
+       else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0)
+               index = 3;
+       else
+               index = 4;
+       return index;
+}
+
 enum btrfs_loop_type {
        LOOP_FIND_IDEAL = 0,
        LOOP_CACHING_NOWAIT = 1,
@@ -4155,7 +4538,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                                     u64 num_bytes, u64 empty_size,
                                     u64 search_start, u64 search_end,
                                     u64 hint_byte, struct btrfs_key *ins,
-                                    u64 exclude_start, u64 exclude_nr,
                                     int data)
 {
        int ret = 0;
@@ -4168,6 +4550,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        struct btrfs_space_info *space_info;
        int last_ptr_loop = 0;
        int loop = 0;
+       int index = 0;
        bool found_uncached_bg = false;
        bool failed_cluster_refill = false;
        bool failed_alloc = false;
@@ -4237,6 +4620,7 @@ ideal_cache:
                                btrfs_put_block_group(block_group);
                                up_read(&space_info->groups_sem);
                        } else {
+                               index = get_block_group_index(block_group);
                                goto have_block_group;
                        }
                } else if (block_group) {
@@ -4245,7 +4629,8 @@ ideal_cache:
        }
 search:
        down_read(&space_info->groups_sem);
-       list_for_each_entry(block_group, &space_info->block_groups, list) {
+       list_for_each_entry(block_group, &space_info->block_groups[index],
+                           list) {
                u64 offset;
                int cached;
 
@@ -4436,23 +4821,22 @@ checks:
                        goto loop;
                }
 
-               if (exclude_nr > 0 &&
-                   (search_start + num_bytes > exclude_start &&
-                    search_start < exclude_start + exclude_nr)) {
-                       search_start = exclude_start + exclude_nr;
+               ins->objectid = search_start;
+               ins->offset = num_bytes;
+
+               if (offset < search_start)
+                       btrfs_add_free_space(block_group, offset,
+                                            search_start - offset);
+               BUG_ON(offset > search_start);
 
+               ret = update_reserved_bytes(block_group, num_bytes, 1,
+                                           (data & BTRFS_BLOCK_GROUP_DATA));
+               if (ret == -EAGAIN) {
                        btrfs_add_free_space(block_group, offset, num_bytes);
-                       /*
-                        * if search_start is still in this block group
-                        * then we just re-search this block group
-                        */
-                       if (search_start >= block_group->key.objectid &&
-                           search_start < (block_group->key.objectid +
-                                           block_group->key.offset))
-                               goto have_block_group;
                        goto loop;
                }
 
+               /* we are all good, lets return */
                ins->objectid = search_start;
                ins->offset = num_bytes;
 
@@ -4460,18 +4844,18 @@ checks:
                        btrfs_add_free_space(block_group, offset,
                                             search_start - offset);
                BUG_ON(offset > search_start);
-
-               update_reserved_extents(block_group, num_bytes, 1);
-
-               /* we are all good, lets return */
                break;
 loop:
                failed_cluster_refill = false;
                failed_alloc = false;
+               BUG_ON(index != get_block_group_index(block_group));
                btrfs_put_block_group(block_group);
        }
        up_read(&space_info->groups_sem);
 
+       if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
+               goto search;
+
        /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
         *                      for them to make caching progress.  Also
         *                      determine the best possible bg to cache
@@ -4485,6 +4869,7 @@ loop:
        if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
            (found_uncached_bg || empty_size || empty_cluster ||
             allowed_chunk_alloc)) {
+               index = 0;
                if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
                        found_uncached_bg = false;
                        loop++;
@@ -4567,31 +4952,30 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
                            int dump_block_groups)
 {
        struct btrfs_block_group_cache *cache;
+       int index = 0;
 
        spin_lock(&info->lock);
        printk(KERN_INFO "space_info has %llu free, is %sfull\n",
               (unsigned long long)(info->total_bytes - info->bytes_used -
                                    info->bytes_pinned - info->bytes_reserved -
-                                   info->bytes_super),
+                                   info->bytes_readonly),
               (info->full) ? "" : "not ");
-       printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
-              " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
-              "\n",
+       printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
+              "reserved=%llu, may_use=%llu, readonly=%llu\n",
               (unsigned long long)info->total_bytes,
+              (unsigned long long)info->bytes_used,
               (unsigned long long)info->bytes_pinned,
-              (unsigned long long)info->bytes_delalloc,
+              (unsigned long long)info->bytes_reserved,
               (unsigned long long)info->bytes_may_use,
-              (unsigned long long)info->bytes_used,
-              (unsigned long long)info->bytes_root,
-              (unsigned long long)info->bytes_super,
-              (unsigned long long)info->bytes_reserved);
+              (unsigned long long)info->bytes_readonly);
        spin_unlock(&info->lock);
 
        if (!dump_block_groups)
                return;
 
        down_read(&info->groups_sem);
-       list_for_each_entry(cache, &info->block_groups, list) {
+again:
+       list_for_each_entry(cache, &info->block_groups[index], list) {
                spin_lock(&cache->lock);
                printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
                       "%llu pinned %llu reserved\n",
@@ -4603,6 +4987,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
                btrfs_dump_free_space(cache, bytes);
                spin_unlock(&cache->lock);
        }
+       if (++index < BTRFS_NR_RAID_TYPES)
+               goto again;
        up_read(&info->groups_sem);
 }
 
@@ -4628,9 +5014,8 @@ again:
 
        WARN_ON(num_bytes < root->sectorsize);
        ret = find_free_extent(trans, root, num_bytes, empty_size,
-                              search_start, search_end, hint_byte, ins,
-                              trans->alloc_exclude_start,
-                              trans->alloc_exclude_nr, data);
+                              search_start, search_end, hint_byte,
+                              ins, data);
 
        if (ret == -ENOSPC && num_bytes > min_alloc_size) {
                num_bytes = num_bytes >> 1;
@@ -4668,7 +5053,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
        ret = btrfs_discard_extent(root, start, len);
 
        btrfs_add_free_space(cache, start, len);
-       update_reserved_extents(cache, len, 0);
+       update_reserved_bytes(cache, len, 0, 1);
        btrfs_put_block_group(cache);
 
        return ret;
@@ -4731,8 +5116,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_free_path(path);
 
-       ret = update_block_group(trans, root, ins->objectid, ins->offset,
-                                1, 0);
+       ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
        if (ret) {
                printk(KERN_ERR "btrfs update block group failed for %llu "
                       "%llu\n", (unsigned long long)ins->objectid,
@@ -4792,8 +5176,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_free_path(path);
 
-       ret = update_block_group(trans, root, ins->objectid, ins->offset,
-                                1, 0);
+       ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
        if (ret) {
                printk(KERN_ERR "btrfs update block group failed for %llu "
                       "%llu\n", (unsigned long long)ins->objectid,
@@ -4869,73 +5252,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
                put_caching_control(caching_ctl);
        }
 
-       update_reserved_extents(block_group, ins->offset, 1);
+       ret = update_reserved_bytes(block_group, ins->offset, 1, 1);
+       BUG_ON(ret);
        btrfs_put_block_group(block_group);
        ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
                                         0, owner, offset, ins, 1);
        return ret;
 }
 
-/*
- * finds a free extent and does all the dirty work required for allocation
- * returns the key for the extent through ins, and a tree buffer for
- * the first block of the extent through buf.
- *
- * returns 0 if everything worked, non-zero otherwise.
- */
-static int alloc_tree_block(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root,
-                           u64 num_bytes, u64 parent, u64 root_objectid,
-                           struct btrfs_disk_key *key, int level,
-                           u64 empty_size, u64 hint_byte, u64 search_end,
-                           struct btrfs_key *ins)
-{
-       int ret;
-       u64 flags = 0;
-
-       ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
-                                  empty_size, hint_byte, search_end,
-                                  ins, 0);
-       if (ret)
-               return ret;
-
-       if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
-               if (parent == 0)
-                       parent = ins->objectid;
-               flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
-       } else
-               BUG_ON(parent > 0);
-
-       if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
-               struct btrfs_delayed_extent_op *extent_op;
-               extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
-               BUG_ON(!extent_op);
-               if (key)
-                       memcpy(&extent_op->key, key, sizeof(extent_op->key));
-               else
-                       memset(&extent_op->key, 0, sizeof(extent_op->key));
-               extent_op->flags_to_set = flags;
-               extent_op->update_key = 1;
-               extent_op->update_flags = 1;
-               extent_op->is_data = 0;
-
-               ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
-                                       ins->offset, parent, root_objectid,
-                                       level, BTRFS_ADD_DELAYED_EXTENT,
-                                       extent_op);
-               BUG_ON(ret);
-       }
-
-       if (root_objectid == root->root_key.objectid) {
-               u64 used;
-               spin_lock(&root->node_lock);
-               used = btrfs_root_used(&root->root_item) + num_bytes;
-               btrfs_set_root_used(&root->root_item, used);
-               spin_unlock(&root->node_lock);
-       }
-       return ret;
-}
-
 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
                                            struct btrfs_root *root,
                                            u64 bytenr, u32 blocksize,
@@ -4974,8 +5298,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
        return buf;
 }
 
+static struct btrfs_block_rsv *
+use_block_rsv(struct btrfs_trans_handle *trans,
+             struct btrfs_root *root, u32 blocksize)
+{
+       struct btrfs_block_rsv *block_rsv;
+       int ret;
+
+       block_rsv = get_block_rsv(trans, root);
+
+       if (block_rsv->size == 0) {
+               ret = reserve_metadata_bytes(block_rsv, blocksize);
+               if (ret)
+                       return ERR_PTR(ret);
+               return block_rsv;
+       }
+
+       ret = block_rsv_use_bytes(block_rsv, blocksize);
+       if (!ret)
+               return block_rsv;
+
+       WARN_ON(1);
+       printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
+               block_rsv->size, block_rsv->reserved,
+               block_rsv->freed[0], block_rsv->freed[1]);
+
+       return ERR_PTR(-ENOSPC);
+}
+
+static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
+{
+       block_rsv_add_bytes(block_rsv, blocksize, 0);
+       block_rsv_release_bytes(block_rsv, NULL, 0);
+}
+
 /*
- * helper function to allocate a block for a given tree
+ * finds a free extent and does all the dirty work required for allocation
+ * returns the key for the extent through ins, and a tree buffer for
+ * the first block of the extent through buf.
+ *
  * returns the tree buffer or NULL.
  */
 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
@@ -4985,18 +5346,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                                        u64 hint, u64 empty_size)
 {
        struct btrfs_key ins;
-       int ret;
+       struct btrfs_block_rsv *block_rsv;
        struct extent_buffer *buf;
+       u64 flags = 0;
+       int ret;
+
 
-       ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
-                              key, level, empty_size, hint, (u64)-1, &ins);
+       block_rsv = use_block_rsv(trans, root, blocksize);
+       if (IS_ERR(block_rsv))
+               return ERR_CAST(block_rsv);
+
+       ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
+                                  empty_size, hint, (u64)-1, &ins, 0);
        if (ret) {
-               BUG_ON(ret > 0);
+               unuse_block_rsv(block_rsv, blocksize);
                return ERR_PTR(ret);
        }
 
        buf = btrfs_init_new_buffer(trans, root, ins.objectid,
                                    blocksize, level);
+       BUG_ON(IS_ERR(buf));
+
+       if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
+               if (parent == 0)
+                       parent = ins.objectid;
+               flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+       } else
+               BUG_ON(parent > 0);
+
+       if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
+               struct btrfs_delayed_extent_op *extent_op;
+               extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
+               BUG_ON(!extent_op);
+               if (key)
+                       memcpy(&extent_op->key, key, sizeof(extent_op->key));
+               else
+                       memset(&extent_op->key, 0, sizeof(extent_op->key));
+               extent_op->flags_to_set = flags;
+               extent_op->update_key = 1;
+               extent_op->update_flags = 1;
+               extent_op->is_data = 0;
+
+               ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
+                                       ins.offset, parent, root_objectid,
+                                       level, BTRFS_ADD_DELAYED_EXTENT,
+                                       extent_op);
+               BUG_ON(ret);
+       }
        return buf;
 }
 
@@ -5321,7 +5717,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                                 struct btrfs_path *path,
                                 struct walk_control *wc)
 {
-       int ret = 0;
+       int ret;
        int level = wc->level;
        struct extent_buffer *eb = path->nodes[level];
        u64 parent = 0;
@@ -5399,13 +5795,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                               btrfs_header_owner(path->nodes[level + 1]));
        }
 
-       ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
-                               root->root_key.objectid, level, 0);
-       BUG_ON(ret);
+       btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
 out:
        wc->refs[level] = 0;
        wc->flags[level] = 0;
-       return ret;
+       return 0;
 }
 
 static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
@@ -5483,7 +5877,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
  * also make sure backrefs for the shared block and all lower level
  * blocks are properly updated.
  */
-int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
+int btrfs_drop_snapshot(struct btrfs_root *root,
+                       struct btrfs_block_rsv *block_rsv, int update_ref)
 {
        struct btrfs_path *path;
        struct btrfs_trans_handle *trans;
@@ -5501,7 +5896,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
        wc = kzalloc(sizeof(*wc), GFP_NOFS);
        BUG_ON(!wc);
 
-       trans = btrfs_start_transaction(tree_root, 1);
+       trans = btrfs_start_transaction(tree_root, 0);
+       if (block_rsv)
+               trans->block_rsv = block_rsv;
 
        if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
                level = btrfs_header_level(root->node);
@@ -5589,22 +5986,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
                }
 
                BUG_ON(wc->level == 0);
-               if (trans->transaction->in_commit ||
-                   trans->transaction->delayed_refs.flushing) {
+               if (btrfs_should_end_transaction(trans, tree_root)) {
                        ret = btrfs_update_root(trans, tree_root,
                                                &root->root_key,
                                                root_item);
                        BUG_ON(ret);
 
-                       btrfs_end_transaction(trans, tree_root);
-                       trans = btrfs_start_transaction(tree_root, 1);
-               } else {
-                       unsigned long update;
-                       update = trans->delayed_ref_updates;
-                       trans->delayed_ref_updates = 0;
-                       if (update)
-                               btrfs_run_delayed_refs(trans, tree_root,
-                                                      update);
+                       btrfs_end_transaction_throttle(trans, tree_root);
+                       trans = btrfs_start_transaction(tree_root, 0);
+                       if (block_rsv)
+                               trans->block_rsv = block_rsv;
                }
        }
        btrfs_release_path(root, path);
@@ -5632,7 +6023,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
                kfree(root);
        }
 out:
-       btrfs_end_transaction(trans, tree_root);
+       btrfs_end_transaction_throttle(trans, tree_root);
        kfree(wc);
        btrfs_free_path(path);
        return err;
@@ -7228,48 +7619,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
        return flags;
 }
 
-static int __alloc_chunk_for_shrink(struct btrfs_root *root,
-                    struct btrfs_block_group_cache *shrink_block_group,
-                    int force)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache)
 {
-       struct btrfs_trans_handle *trans;
-       u64 new_alloc_flags;
-       u64 calc;
+       struct btrfs_space_info *sinfo = cache->space_info;
+       u64 num_bytes;
+       int ret = -ENOSPC;
 
-       spin_lock(&shrink_block_group->lock);
-       if (btrfs_block_group_used(&shrink_block_group->item) +
-           shrink_block_group->reserved > 0) {
-               spin_unlock(&shrink_block_group->lock);
+       if (cache->ro)
+               return 0;
 
-               trans = btrfs_start_transaction(root, 1);
-               spin_lock(&shrink_block_group->lock);
+       spin_lock(&sinfo->lock);
+       spin_lock(&cache->lock);
+       num_bytes = cache->key.offset - cache->reserved - cache->pinned -
+                   cache->bytes_super - btrfs_block_group_used(&cache->item);
+
+       if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
+           sinfo->bytes_may_use + sinfo->bytes_readonly +
+           cache->reserved_pinned + num_bytes < sinfo->total_bytes) {
+               sinfo->bytes_readonly += num_bytes;
+               sinfo->bytes_reserved += cache->reserved_pinned;
+               cache->reserved_pinned = 0;
+               cache->ro = 1;
+               ret = 0;
+       }
+       spin_unlock(&cache->lock);
+       spin_unlock(&sinfo->lock);
+       return ret;
+}
 
-               new_alloc_flags = update_block_group_flags(root,
-                                                  shrink_block_group->flags);
-               if (new_alloc_flags != shrink_block_group->flags) {
-                       calc =
-                            btrfs_block_group_used(&shrink_block_group->item);
-               } else {
-                       calc = shrink_block_group->key.offset;
-               }
-               spin_unlock(&shrink_block_group->lock);
+int btrfs_set_block_group_ro(struct btrfs_root *root,
+                            struct btrfs_block_group_cache *cache)
 
-               do_chunk_alloc(trans, root->fs_info->extent_root,
-                              calc + 2 * 1024 * 1024, new_alloc_flags, force);
+{
+       struct btrfs_trans_handle *trans;
+       u64 alloc_flags;
+       int ret;
 
-               btrfs_end_transaction(trans, root);
-       } else
-               spin_unlock(&shrink_block_group->lock);
-       return 0;
-}
+       BUG_ON(cache->ro);
 
+       trans = btrfs_join_transaction(root, 1);
+       BUG_ON(IS_ERR(trans));
 
-int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
-                                        struct btrfs_block_group_cache *group)
+       alloc_flags = update_block_group_flags(root, cache->flags);
+       if (alloc_flags != cache->flags)
+               do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+
+       ret = set_block_group_ro(cache);
+       if (!ret)
+               goto out;
+       alloc_flags = get_alloc_profile(root, cache->space_info->flags);
+       ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+       if (ret < 0)
+               goto out;
+       ret = set_block_group_ro(cache);
+out:
+       btrfs_end_transaction(trans, root);
+       return ret;
+}
 
+int btrfs_set_block_group_rw(struct btrfs_root *root,
+                             struct btrfs_block_group_cache *cache)
 {
-       __alloc_chunk_for_shrink(root, group, 1);
-       set_block_group_readonly(group);
+       struct btrfs_space_info *sinfo = cache->space_info;
+       u64 num_bytes;
+
+       BUG_ON(!cache->ro);
+
+       spin_lock(&sinfo->lock);
+       spin_lock(&cache->lock);
+       num_bytes = cache->key.offset - cache->reserved - cache->pinned -
+                   cache->bytes_super - btrfs_block_group_used(&cache->item);
+       sinfo->bytes_readonly -= num_bytes;
+       cache->ro = 0;
+       spin_unlock(&cache->lock);
+       spin_unlock(&sinfo->lock);
        return 0;
 }
 
@@ -7436,17 +7859,33 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
         */
        synchronize_rcu();
 
+       release_global_block_rsv(info);
+
        while(!list_empty(&info->space_info)) {
                space_info = list_entry(info->space_info.next,
                                        struct btrfs_space_info,
                                        list);
-
+               if (space_info->bytes_pinned > 0 ||
+                   space_info->bytes_reserved > 0) {
+                       WARN_ON(1);
+                       dump_space_info(space_info, 0, 0);
+               }
                list_del(&space_info->list);
                kfree(space_info);
        }
        return 0;
 }
 
+static void __link_block_group(struct btrfs_space_info *space_info,
+                              struct btrfs_block_group_cache *cache)
+{
+       int index = get_block_group_index(cache);
+
+       down_write(&space_info->groups_sem);
+       list_add_tail(&cache->list, &space_info->block_groups[index]);
+       up_write(&space_info->groups_sem);
+}
+
 int btrfs_read_block_groups(struct btrfs_root *root)
 {
        struct btrfs_path *path;
@@ -7468,10 +7907,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
        while (1) {
                ret = find_first_block_group(root, path, &key);
-               if (ret > 0) {
-                       ret = 0;
-                       goto error;
-               }
+               if (ret > 0)
+                       break;
                if (ret != 0)
                        goto error;
 
@@ -7480,7 +7917,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                cache = kzalloc(sizeof(*cache), GFP_NOFS);
                if (!cache) {
                        ret = -ENOMEM;
-                       break;
+                       goto error;
                }
 
                atomic_set(&cache->count, 1);
@@ -7537,20 +7974,36 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                BUG_ON(ret);
                cache->space_info = space_info;
                spin_lock(&cache->space_info->lock);
-               cache->space_info->bytes_super += cache->bytes_super;
+               cache->space_info->bytes_readonly += cache->bytes_super;
                spin_unlock(&cache->space_info->lock);
 
-               down_write(&space_info->groups_sem);
-               list_add_tail(&cache->list, &space_info->block_groups);
-               up_write(&space_info->groups_sem);
+               __link_block_group(space_info, cache);
 
                ret = btrfs_add_block_group_cache(root->fs_info, cache);
                BUG_ON(ret);
 
                set_avail_alloc_bits(root->fs_info, cache->flags);
                if (btrfs_chunk_readonly(root, cache->key.objectid))
-                       set_block_group_readonly(cache);
+                       set_block_group_ro(cache);
+       }
+
+       list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
+               if (!(get_alloc_profile(root, space_info->flags) &
+                     (BTRFS_BLOCK_GROUP_RAID10 |
+                      BTRFS_BLOCK_GROUP_RAID1 |
+                      BTRFS_BLOCK_GROUP_DUP)))
+                       continue;
+               /*
+                * avoid allocating from un-mirrored block group if there are
+                * mirrored block groups.
+                */
+               list_for_each_entry(cache, &space_info->block_groups[3], list)
+                       set_block_group_ro(cache);
+               list_for_each_entry(cache, &space_info->block_groups[4], list)
+                       set_block_group_ro(cache);
        }
+
+       init_global_block_rsv(info);
        ret = 0;
 error:
        btrfs_free_path(path);
@@ -7611,12 +8064,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        BUG_ON(ret);
 
        spin_lock(&cache->space_info->lock);
-       cache->space_info->bytes_super += cache->bytes_super;
+       cache->space_info->bytes_readonly += cache->bytes_super;
        spin_unlock(&cache->space_info->lock);
 
-       down_write(&cache->space_info->groups_sem);
-       list_add_tail(&cache->list, &cache->space_info->block_groups);
-       up_write(&cache->space_info->groups_sem);
+       __link_block_group(cache->space_info, cache);
 
        ret = btrfs_add_block_group_cache(root->fs_info, cache);
        BUG_ON(ret);
index d2d0368..a4080c2 100644 (file)
@@ -135,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
        return state;
 }
 
-static void free_extent_state(struct extent_state *state)
+void free_extent_state(struct extent_state *state)
 {
        if (!state)
                return;
@@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree,
 }
 
 static int set_state_cb(struct extent_io_tree *tree,
-                        struct extent_state *state,
-                        unsigned long bits)
+                        struct extent_state *state, int *bits)
 {
        if (tree->ops && tree->ops->set_bit_hook) {
                return tree->ops->set_bit_hook(tree->mapping->host,
-                                              state->start, state->end,
-                                              state->state, bits);
+                                              state, bits);
        }
 
        return 0;
 }
 
 static void clear_state_cb(struct extent_io_tree *tree,
-                          struct extent_state *state,
-                          unsigned long bits)
+                          struct extent_state *state, int *bits)
 {
        if (tree->ops && tree->ops->clear_bit_hook)
                tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
@@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree,
  */
 static int insert_state(struct extent_io_tree *tree,
                        struct extent_state *state, u64 start, u64 end,
-                       int bits)
+                       int *bits)
 {
        struct rb_node *node;
+       int bits_to_set = *bits & ~EXTENT_CTLBITS;
        int ret;
 
        if (end < start) {
@@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree,
        if (ret)
                return ret;
 
-       if (bits & EXTENT_DIRTY)
+       if (bits_to_set & EXTENT_DIRTY)
                tree->dirty_bytes += end - start + 1;
-       state->state |= bits;
+       state->state |= bits_to_set;
        node = tree_insert(&tree->state, end, &state->rb_node);
        if (node) {
                struct extent_state *found;
@@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
  * struct is freed and removed from the tree
  */
 static int clear_state_bit(struct extent_io_tree *tree,
-                           struct extent_state *state, int bits, int wake,
-                           int delete)
+                           struct extent_state *state,
+                           int *bits, int wake)
 {
-       int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING;
+       int bits_to_clear = *bits & ~EXTENT_CTLBITS;
        int ret = state->state & bits_to_clear;
 
-       if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
+       if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
                u64 range = state->end - state->start + 1;
                WARN_ON(range > tree->dirty_bytes);
                tree->dirty_bytes -= range;
@@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree,
        state->state &= ~bits_to_clear;
        if (wake)
                wake_up(&state->wq);
-       if (delete || state->state == 0) {
+       if (state->state == 0) {
                if (state->tree) {
-                       clear_state_cb(tree, state, state->state);
                        rb_erase(&state->rb_node, &tree->state);
                        state->tree = NULL;
                        free_extent_state(state);
@@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        int set = 0;
        int clear = 0;
 
+       if (delete)
+               bits |= ~EXTENT_CTLBITS;
+       bits |= EXTENT_FIRST_DELALLOC;
+
        if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
                clear = 1;
 again:
@@ -580,8 +581,7 @@ hit_next:
                if (err)
                        goto out;
                if (state->end <= end) {
-                       set |= clear_state_bit(tree, state, bits, wake,
-                                              delete);
+                       set |= clear_state_bit(tree, state, &bits, wake);
                        if (last_end == (u64)-1)
                                goto out;
                        start = last_end + 1;
@@ -602,7 +602,7 @@ hit_next:
                if (wake)
                        wake_up(&state->wq);
 
-               set |= clear_state_bit(tree, prealloc, bits, wake, delete);
+               set |= clear_state_bit(tree, prealloc, &bits, wake);
 
                prealloc = NULL;
                goto out;
@@ -613,7 +613,7 @@ hit_next:
        else
                next_node = NULL;
 
-       set |= clear_state_bit(tree, state, bits, wake, delete);
+       set |= clear_state_bit(tree, state, &bits, wake);
        if (last_end == (u64)-1)
                goto out;
        start = last_end + 1;
@@ -706,19 +706,19 @@ out:
 
 static int set_state_bits(struct extent_io_tree *tree,
                           struct extent_state *state,
-                          int bits)
+                          int *bits)
 {
        int ret;
+       int bits_to_set = *bits & ~EXTENT_CTLBITS;
 
        ret = set_state_cb(tree, state, bits);
        if (ret)
                return ret;
-
-       if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
+       if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
                u64 range = state->end - state->start + 1;
                tree->dirty_bytes += range;
        }
-       state->state |= bits;
+       state->state |= bits_to_set;
 
        return 0;
 }
@@ -745,10 +745,9 @@ static void cache_state(struct extent_state *state,
  * [start, end] is inclusive This takes the tree lock.
  */
 
-static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                         int bits, int exclusive_bits, u64 *failed_start,
-                         struct extent_state **cached_state,
-                         gfp_t mask)
+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                  int bits, int exclusive_bits, u64 *failed_start,
+                  struct extent_state **cached_state, gfp_t mask)
 {
        struct extent_state *state;
        struct extent_state *prealloc = NULL;
@@ -757,6 +756,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        u64 last_start;
        u64 last_end;
 
+       bits |= EXTENT_FIRST_DELALLOC;
 again:
        if (!prealloc && (mask & __GFP_WAIT)) {
                prealloc = alloc_extent_state(mask);
@@ -778,7 +778,7 @@ again:
         */
        node = tree_search(tree, start);
        if (!node) {
-               err = insert_state(tree, prealloc, start, end, bits);
+               err = insert_state(tree, prealloc, start, end, &bits);
                prealloc = NULL;
                BUG_ON(err == -EEXIST);
                goto out;
@@ -802,7 +802,7 @@ hit_next:
                        goto out;
                }
 
-               err = set_state_bits(tree, state, bits);
+               err = set_state_bits(tree, state, &bits);
                if (err)
                        goto out;
 
@@ -852,7 +852,7 @@ hit_next:
                if (err)
                        goto out;
                if (state->end <= end) {
-                       err = set_state_bits(tree, state, bits);
+                       err = set_state_bits(tree, state, &bits);
                        if (err)
                                goto out;
                        cache_state(state, cached_state);
@@ -877,7 +877,7 @@ hit_next:
                else
                        this_end = last_start - 1;
                err = insert_state(tree, prealloc, start, this_end,
-                                  bits);
+                                  &bits);
                BUG_ON(err == -EEXIST);
                if (err) {
                        prealloc = NULL;
@@ -903,7 +903,7 @@ hit_next:
                err = split_state(tree, state, prealloc, end + 1);
                BUG_ON(err == -EEXIST);
 
-               err = set_state_bits(tree, prealloc, bits);
+               err = set_state_bits(tree, prealloc, &bits);
                if (err) {
                        prealloc = NULL;
                        goto out;
@@ -966,8 +966,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 {
        return clear_extent_bit(tree, start, end,
                                EXTENT_DIRTY | EXTENT_DELALLOC |
-                               EXTENT_DO_ACCOUNTING, 0, 0,
-                               NULL, mask);
+                               EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
 }
 
 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1435,9 +1434,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
        if (op & EXTENT_CLEAR_DELALLOC)
                clear_bits |= EXTENT_DELALLOC;
 
-       if (op & EXTENT_CLEAR_ACCOUNTING)
-               clear_bits |= EXTENT_DO_ACCOUNTING;
-
        clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
        if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
                    EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
@@ -1916,7 +1912,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
 
        if (tree->ops && tree->ops->submit_bio_hook)
                tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
-                                          mirror_num, bio_flags);
+                                          mirror_num, bio_flags, start);
        else
                submit_bio(rw, bio);
        if (bio_flagged(bio, BIO_EOPNOTSUPP))
@@ -2020,6 +2016,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
        sector_t sector;
        struct extent_map *em;
        struct block_device *bdev;
+       struct btrfs_ordered_extent *ordered;
        int ret;
        int nr = 0;
        size_t page_offset = 0;
@@ -2031,7 +2028,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
        set_page_extent_mapped(page);
 
        end = page_end;
-       lock_extent(tree, start, end, GFP_NOFS);
+       while (1) {
+               lock_extent(tree, start, end, GFP_NOFS);
+               ordered = btrfs_lookup_ordered_extent(inode, start);
+               if (!ordered)
+                       break;
+               unlock_extent(tree, start, end, GFP_NOFS);
+               btrfs_start_ordered_extent(inode, ordered, 1);
+               btrfs_put_ordered_extent(ordered);
+       }
 
        if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
                char *userpage;
index bbab481..5691c7b 100644 (file)
@@ -16,7 +16,9 @@
 #define EXTENT_BOUNDARY (1 << 9)
 #define EXTENT_NODATASUM (1 << 10)
 #define EXTENT_DO_ACCOUNTING (1 << 11)
+#define EXTENT_FIRST_DELALLOC (1 << 12)
 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
 
 /* flags for bio submission */
 #define EXTENT_BIO_COMPRESSED 1
@@ -47,7 +49,7 @@ struct extent_state;
 
 typedef        int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
                                       struct bio *bio, int mirror_num,
-                                      unsigned long bio_flags);
+                                      unsigned long bio_flags, u64 bio_offset);
 struct extent_io_ops {
        int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
                             u64 start, u64 end, int *page_started,
@@ -69,10 +71,10 @@ struct extent_io_ops {
                                    struct extent_state *state);
        int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
                                      struct extent_state *state, int uptodate);
-       int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
-                           unsigned long old, unsigned long bits);
+       int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
+                           int *bits);
        int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
-                             unsigned long bits);
+                             int *bits);
        int (*merge_extent_hook)(struct inode *inode,
                                 struct extent_state *new,
                                 struct extent_state *other);
@@ -176,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end,
                     u64 max_bytes, unsigned long bits);
 
+void free_extent_state(struct extent_state *state);
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
                   int bits, int filled, struct extent_state *cached_state);
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@@ -185,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                     gfp_t mask);
 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                    int bits, gfp_t mask);
+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                  int bits, int exclusive_bits, u64 *failed_start,
+                  struct extent_state **cached_state, gfp_t mask);
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
                        gfp_t mask);
 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
index 54a2550..a562a25 100644 (file)
@@ -149,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 }
 
 
-int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
-                         struct bio *bio, u32 *dst)
+static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
+                                  struct inode *inode, struct bio *bio,
+                                  u64 logical_offset, u32 *dst, int dio)
 {
        u32 sum;
        struct bio_vec *bvec = bio->bi_io_vec;
        int bio_index = 0;
-       u64 offset;
+       u64 offset = 0;
        u64 item_start_offset = 0;
        u64 item_last_offset = 0;
        u64 disk_bytenr;
@@ -174,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
        WARN_ON(bio->bi_vcnt <= 0);
 
        disk_bytenr = (u64)bio->bi_sector << 9;
+       if (dio)
+               offset = logical_offset;
        while (bio_index < bio->bi_vcnt) {
-               offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+               if (!dio)
+                       offset = page_offset(bvec->bv_page) + bvec->bv_offset;
                ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
                if (ret == 0)
                        goto found;
@@ -238,6 +242,7 @@ found:
                else
                        set_state_private(io_tree, offset, sum);
                disk_bytenr += bvec->bv_len;
+               offset += bvec->bv_len;
                bio_index++;
                bvec++;
        }
@@ -245,6 +250,18 @@ found:
        return 0;
 }
 
+int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
+                         struct bio *bio, u32 *dst)
+{
+       return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0);
+}
+
+int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
+                             struct bio *bio, u64 offset, u32 *dst)
+{
+       return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
+}
+
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
                             struct list_head *list)
 {
@@ -657,6 +674,9 @@ again:
                goto found;
        }
        ret = PTR_ERR(item);
+       if (ret != -EFBIG && ret != -ENOENT)
+               goto fail_unlock;
+
        if (ret == -EFBIG) {
                u32 item_size;
                /* we found one, but it isn't big enough yet */
index 29ff749..79437c5 100644 (file)
 static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
                                         int write_bytes,
                                         struct page **prepared_pages,
-                                        const char __user *buf)
+                                        struct iov_iter *i)
 {
-       long page_fault = 0;
-       int i;
+       size_t copied;
+       int pg = 0;
        int offset = pos & (PAGE_CACHE_SIZE - 1);
 
-       for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
+       while (write_bytes > 0) {
                size_t count = min_t(size_t,
                                     PAGE_CACHE_SIZE - offset, write_bytes);
-               struct page *page = prepared_pages[i];
-               fault_in_pages_readable(buf, count);
+               struct page *page = prepared_pages[pg];
+again:
+               if (unlikely(iov_iter_fault_in_readable(i, count)))
+                       return -EFAULT;
 
                /* Copy data from userspace to the current page */
-               kmap(page);
-               page_fault = __copy_from_user(page_address(page) + offset,
-                                             buf, count);
+               copied = iov_iter_copy_from_user(page, i, offset, count);
+
                /* Flush processor's dcache for this page */
                flush_dcache_page(page);
-               kunmap(page);
-               buf += count;
-               write_bytes -= count;
+               iov_iter_advance(i, copied);
+               write_bytes -= copied;
 
-               if (page_fault)
-                       break;
+               if (unlikely(copied == 0)) {
+                       count = min_t(size_t, PAGE_CACHE_SIZE - offset,
+                                     iov_iter_single_seg_count(i));
+                       goto again;
+               }
+
+               if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
+                       offset += copied;
+               } else {
+                       pg++;
+                       offset = 0;
+               }
        }
-       return page_fault ? -EFAULT : 0;
+       return 0;
 }
 
 /*
@@ -126,8 +136,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
        end_of_last_block = start_pos + num_bytes - 1;
        err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
                                        NULL);
-       if (err)
-               return err;
+       BUG_ON(err);
 
        for (i = 0; i < num_pages; i++) {
                struct page *p = pages[i];
@@ -142,7 +151,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
                 * at this time.
                 */
        }
-       return err;
+       return 0;
 }
 
 /*
@@ -823,45 +832,46 @@ again:
        return 0;
 }
 
-static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
-                               size_t count, loff_t *ppos)
+static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
+                                   const struct iovec *iov,
+                                   unsigned long nr_segs, loff_t pos)
 {
-       loff_t pos;
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = fdentry(file)->d_inode;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct page *pinned[2];
+       struct page **pages = NULL;
+       struct iov_iter i;
+       loff_t *ppos = &iocb->ki_pos;
        loff_t start_pos;
        ssize_t num_written = 0;
        ssize_t err = 0;
+       size_t count;
+       size_t ocount;
        int ret = 0;
-       struct inode *inode = fdentry(file)->d_inode;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct page **pages = NULL;
        int nrptrs;
-       struct page *pinned[2];
        unsigned long first_index;
        unsigned long last_index;
        int will_write;
+       int buffered = 0;
 
        will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
                      (file->f_flags & O_DIRECT));
 
-       nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
-                    PAGE_CACHE_SIZE / (sizeof(struct page *)));
        pinned[0] = NULL;
        pinned[1] = NULL;
 
-       pos = *ppos;
        start_pos = pos;
 
        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 
-       /* do the reserve before the mutex lock in case we have to do some
-        * flushing.  We wouldn't deadlock, but this is more polite.
-        */
-       err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
-       if (err)
-               goto out_nolock;
-
        mutex_lock(&inode->i_mutex);
 
+       err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
+       if (err)
+               goto out;
+       count = ocount;
+
        current->backing_dev_info = inode->i_mapping->backing_dev_info;
        err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
        if (err)
@@ -875,15 +885,53 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                goto out;
 
        file_update_time(file);
+       BTRFS_I(inode)->sequence++;
+
+       if (unlikely(file->f_flags & O_DIRECT)) {
+               num_written = generic_file_direct_write(iocb, iov, &nr_segs,
+                                                       pos, ppos, count,
+                                                       ocount);
+               /*
+                * the generic O_DIRECT will update in-memory i_size after the
+                * DIOs are done.  But our endio handlers that update the on
+                * disk i_size never update past the in memory i_size.  So we
+                * need one more update here to catch any additions to the
+                * file
+                */
+               if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
+                       btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+                       mark_inode_dirty(inode);
+               }
 
+               if (num_written < 0) {
+                       ret = num_written;
+                       num_written = 0;
+                       goto out;
+               } else if (num_written == count) {
+                       /* pick up pos changes done by the generic code */
+                       pos = *ppos;
+                       goto out;
+               }
+               /*
+                * We are going to do buffered for the rest of the range, so we
+                * need to make sure to invalidate the buffered pages when we're
+                * done.
+                */
+               buffered = 1;
+               pos += num_written;
+       }
+
+       iov_iter_init(&i, iov, nr_segs, count, num_written);
+       nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
+                    PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
+                    (sizeof(struct page *)));
        pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
 
        /* generic_write_checks can change our pos */
        start_pos = pos;
 
-       BTRFS_I(inode)->sequence++;
        first_index = pos >> PAGE_CACHE_SHIFT;
-       last_index = (pos + count) >> PAGE_CACHE_SHIFT;
+       last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
 
        /*
         * there are lots of better ways to do this, but this code
@@ -900,7 +948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                        unlock_page(pinned[0]);
                }
        }
-       if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
+       if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
                pinned[1] = grab_cache_page(inode->i_mapping, last_index);
                if (!PageUptodate(pinned[1])) {
                        ret = btrfs_readpage(NULL, pinned[1]);
@@ -911,10 +959,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                }
        }
 
-       while (count > 0) {
+       while (iov_iter_count(&i) > 0) {
                size_t offset = pos & (PAGE_CACHE_SIZE - 1);
-               size_t write_bytes = min(count, nrptrs *
-                                       (size_t)PAGE_CACHE_SIZE -
+               size_t write_bytes = min(iov_iter_count(&i),
+                                        nrptrs * (size_t)PAGE_CACHE_SIZE -
                                         offset);
                size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
                                        PAGE_CACHE_SHIFT;
@@ -922,7 +970,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                WARN_ON(num_pages > nrptrs);
                memset(pages, 0, sizeof(struct page *) * nrptrs);
 
-               ret = btrfs_check_data_free_space(root, inode, write_bytes);
+               ret = btrfs_delalloc_reserve_space(inode, write_bytes);
                if (ret)
                        goto out;
 
@@ -930,26 +978,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                                    pos, first_index, last_index,
                                    write_bytes);
                if (ret) {
-                       btrfs_free_reserved_data_space(root, inode,
-                                                      write_bytes);
+                       btrfs_delalloc_release_space(inode, write_bytes);
                        goto out;
                }
 
                ret = btrfs_copy_from_user(pos, num_pages,
-                                          write_bytes, pages, buf);
-               if (ret) {
-                       btrfs_free_reserved_data_space(root, inode,
-                                                      write_bytes);
-                       btrfs_drop_pages(pages, num_pages);
-                       goto out;
+                                          write_bytes, pages, &i);
+               if (ret == 0) {
+                       dirty_and_release_pages(NULL, root, file, pages,
+                                               num_pages, pos, write_bytes);
                }
 
-               ret = dirty_and_release_pages(NULL, root, file, pages,
-                                             num_pages, pos, write_bytes);
                btrfs_drop_pages(pages, num_pages);
                if (ret) {
-                       btrfs_free_reserved_data_space(root, inode,
-                                                      write_bytes);
+                       btrfs_delalloc_release_space(inode, write_bytes);
                        goto out;
                }
 
@@ -965,8 +1007,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
                        btrfs_throttle(root);
                }
 
-               buf += write_bytes;
-               count -= write_bytes;
                pos += write_bytes;
                num_written += write_bytes;
 
@@ -976,9 +1016,7 @@ out:
        mutex_unlock(&inode->i_mutex);
        if (ret)
                err = ret;
-       btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
 
-out_nolock:
        kfree(pages);
        if (pinned[0])
                page_cache_release(pinned[0]);
@@ -1008,7 +1046,7 @@ out_nolock:
                        num_written = err;
 
                if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
-                       trans = btrfs_start_transaction(root, 1);
+                       trans = btrfs_start_transaction(root, 0);
                        ret = btrfs_log_dentry_safe(trans, root,
                                                    file->f_dentry);
                        if (ret == 0) {
@@ -1023,7 +1061,7 @@ out_nolock:
                                btrfs_end_transaction(trans, root);
                        }
                }
-               if (file->f_flags & O_DIRECT) {
+               if (file->f_flags & O_DIRECT && buffered) {
                        invalidate_mapping_pages(inode->i_mapping,
                              start_pos >> PAGE_CACHE_SHIFT,
                             (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
@@ -1104,9 +1142,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
        if (file && file->private_data)
                btrfs_ioctl_trans_end(file);
 
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans) {
-               ret = -ENOMEM;
+       trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
                goto out;
        }
 
@@ -1161,7 +1199,7 @@ const struct file_operations btrfs_file_operations = {
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
        .splice_read    = generic_file_splice_read,
-       .write          = btrfs_file_write,
+       .aio_write      = btrfs_file_aio_write,
        .mmap           = btrfs_file_mmap,
        .open           = generic_file_open,
        .release        = btrfs_release_file,
index 72ce3c1..64f1150 100644 (file)
@@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name,
        return 0;
 }
 
+struct btrfs_inode_ref *
+btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct btrfs_path *path,
+                       const char *name, int name_len,
+                       u64 inode_objectid, u64 ref_objectid, int mod)
+{
+       struct btrfs_key key;
+       struct btrfs_inode_ref *ref;
+       int ins_len = mod < 0 ? -1 : 0;
+       int cow = mod != 0;
+       int ret;
+
+       key.objectid = inode_objectid;
+       key.type = BTRFS_INODE_REF_KEY;
+       key.offset = ref_objectid;
+
+       ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+       if (ret < 0)
+               return ERR_PTR(ret);
+       if (ret > 0)
+               return NULL;
+       if (!find_name_in_backref(path, name, name_len, &ref))
+               return NULL;
+       return ref;
+}
+
 int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           const char *name, int name_len,
index d601629..fa6ccc1 100644 (file)
@@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
                                   inline_len, compressed_size,
                                   compressed_pages);
        BUG_ON(ret);
+       btrfs_delalloc_release_metadata(inode, end + 1 - start);
        btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
        return 0;
 }
@@ -414,6 +415,7 @@ again:
                trans = btrfs_join_transaction(root, 1);
                BUG_ON(!trans);
                btrfs_set_trans_block_group(trans, inode);
+               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
                /* lets try to make an inline extent */
                if (ret || total_in < (actual_end - start)) {
@@ -439,7 +441,6 @@ again:
                             start, end, NULL,
                             EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
                             EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_ACCOUNTING |
                             EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
 
                        btrfs_end_transaction(trans, root);
@@ -697,6 +698,38 @@ retry:
        return 0;
 }
 
+static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
+                                     u64 num_bytes)
+{
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
+       u64 alloc_hint = 0;
+
+       read_lock(&em_tree->lock);
+       em = search_extent_mapping(em_tree, start, num_bytes);
+       if (em) {
+               /*
+                * if block start isn't an actual block number then find the
+                * first block in this inode and use that as a hint.  If that
+                * block is also bogus then just don't worry about it.
+                */
+               if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+                       free_extent_map(em);
+                       em = search_extent_mapping(em_tree, 0, 0);
+                       if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
+                               alloc_hint = em->block_start;
+                       if (em)
+                               free_extent_map(em);
+               } else {
+                       alloc_hint = em->block_start;
+                       free_extent_map(em);
+               }
+       }
+       read_unlock(&em_tree->lock);
+
+       return alloc_hint;
+}
+
 /*
  * when extent_io.c finds a delayed allocation range in the file,
  * the call backs end up in this code.  The basic idea is to
@@ -734,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode,
        trans = btrfs_join_transaction(root, 1);
        BUG_ON(!trans);
        btrfs_set_trans_block_group(trans, inode);
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        actual_end = min_t(u64, isize, end + 1);
 
@@ -753,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode,
                                     EXTENT_CLEAR_UNLOCK_PAGE |
                                     EXTENT_CLEAR_UNLOCK |
                                     EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_CLEAR_ACCOUNTING |
                                     EXTENT_CLEAR_DIRTY |
                                     EXTENT_SET_WRITEBACK |
                                     EXTENT_END_WRITEBACK);
@@ -769,29 +802,7 @@ static noinline int cow_file_range(struct inode *inode,
        BUG_ON(disk_num_bytes >
               btrfs_super_total_bytes(&root->fs_info->super_copy));
 
-
-       read_lock(&BTRFS_I(inode)->extent_tree.lock);
-       em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
-                                  start, num_bytes);
-       if (em) {
-               /*
-                * if block start isn't an actual block number then find the
-                * first block in this inode and use that as a hint.  If that
-                * block is also bogus then just don't worry about it.
-                */
-               if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-                       free_extent_map(em);
-                       em = search_extent_mapping(em_tree, 0, 0);
-                       if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
-                               alloc_hint = em->block_start;
-                       if (em)
-                               free_extent_map(em);
-               } else {
-                       alloc_hint = em->block_start;
-                       free_extent_map(em);
-               }
-       }
-       read_unlock(&BTRFS_I(inode)->extent_tree.lock);
+       alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
        btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
 
        while (disk_num_bytes > 0) {
@@ -1174,6 +1185,13 @@ out_check:
                                               num_bytes, num_bytes, type);
                BUG_ON(ret);
 
+               if (root->root_key.objectid ==
+                   BTRFS_DATA_RELOC_TREE_OBJECTID) {
+                       ret = btrfs_reloc_clone_csums(inode, cur_offset,
+                                                     num_bytes);
+                       BUG_ON(ret);
+               }
+
                extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
                                cur_offset, cur_offset + num_bytes - 1,
                                locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
@@ -1226,15 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
 }
 
 static int btrfs_split_extent_hook(struct inode *inode,
-                                   struct extent_state *orig, u64 split)
+                                  struct extent_state *orig, u64 split)
 {
+       /* not delalloc, ignore it */
        if (!(orig->state & EXTENT_DELALLOC))
                return 0;
 
-       spin_lock(&BTRFS_I(inode)->accounting_lock);
-       BTRFS_I(inode)->outstanding_extents++;
-       spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
+       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
        return 0;
 }
 
@@ -1252,10 +1268,7 @@ static int btrfs_merge_extent_hook(struct inode *inode,
        if (!(other->state & EXTENT_DELALLOC))
                return 0;
 
-       spin_lock(&BTRFS_I(inode)->accounting_lock);
-       BTRFS_I(inode)->outstanding_extents--;
-       spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
+       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
        return 0;
 }
 
@@ -1264,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
  * bytes in this file, and to maintain the list of inodes that
  * have pending delalloc work to be done.
  */
-static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
-                      unsigned long old, unsigned long bits)
+static int btrfs_set_bit_hook(struct inode *inode,
+                             struct extent_state *state, int *bits)
 {
 
        /*
@@ -1273,17 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
         * but in this case, we are only testeing for the DELALLOC
         * bit, which is only set or cleared with irqs on
         */
-       if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
+       if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
+               u64 len = state->end + 1 - state->start;
 
-               spin_lock(&BTRFS_I(inode)->accounting_lock);
-               BTRFS_I(inode)->outstanding_extents++;
-               spin_unlock(&BTRFS_I(inode)->accounting_lock);
-               btrfs_delalloc_reserve_space(root, inode, end - start + 1);
+               if (*bits & EXTENT_FIRST_DELALLOC)
+                       *bits &= ~EXTENT_FIRST_DELALLOC;
+               else
+                       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
 
                spin_lock(&root->fs_info->delalloc_lock);
-               BTRFS_I(inode)->delalloc_bytes += end - start + 1;
-               root->fs_info->delalloc_bytes += end - start + 1;
+               BTRFS_I(inode)->delalloc_bytes += len;
+               root->fs_info->delalloc_bytes += len;
                if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
                        list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
                                      &root->fs_info->delalloc_inodes);
@@ -1297,45 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
  * extent_io.c clear_bit_hook, see set_bit_hook for why
  */
 static int btrfs_clear_bit_hook(struct inode *inode,
-                               struct extent_state *state, unsigned long bits)
+                               struct extent_state *state, int *bits)
 {
        /*
         * set_bit and clear bit hooks normally require _irqsave/restore
         * but in this case, we are only testeing for the DELALLOC
         * bit, which is only set or cleared with irqs on
         */
-       if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
+       if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
+               u64 len = state->end + 1 - state->start;
 
-               if (bits & EXTENT_DO_ACCOUNTING) {
-                       spin_lock(&BTRFS_I(inode)->accounting_lock);
-                       WARN_ON(!BTRFS_I(inode)->outstanding_extents);
-                       BTRFS_I(inode)->outstanding_extents--;
-                       spin_unlock(&BTRFS_I(inode)->accounting_lock);
-                       btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
-               }
+               if (*bits & EXTENT_FIRST_DELALLOC)
+                       *bits &= ~EXTENT_FIRST_DELALLOC;
+               else if (!(*bits & EXTENT_DO_ACCOUNTING))
+                       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+
+               if (*bits & EXTENT_DO_ACCOUNTING)
+                       btrfs_delalloc_release_metadata(inode, len);
+
+               if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+                       btrfs_free_reserved_data_space(inode, len);
 
                spin_lock(&root->fs_info->delalloc_lock);
-               if (state->end - state->start + 1 >
-                   root->fs_info->delalloc_bytes) {
-                       printk(KERN_INFO "btrfs warning: delalloc account "
-                              "%llu %llu\n",
-                              (unsigned long long)
-                              state->end - state->start + 1,
-                              (unsigned long long)
-                              root->fs_info->delalloc_bytes);
-                       btrfs_delalloc_free_space(root, inode, (u64)-1);
-                       root->fs_info->delalloc_bytes = 0;
-                       BTRFS_I(inode)->delalloc_bytes = 0;
-               } else {
-                       btrfs_delalloc_free_space(root, inode,
-                                                 state->end -
-                                                 state->start + 1);
-                       root->fs_info->delalloc_bytes -= state->end -
-                               state->start + 1;
-                       BTRFS_I(inode)->delalloc_bytes -= state->end -
-                               state->start + 1;
-               }
+               root->fs_info->delalloc_bytes -= len;
+               BTRFS_I(inode)->delalloc_bytes -= len;
+
                if (BTRFS_I(inode)->delalloc_bytes == 0 &&
                    !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
                        list_del_init(&BTRFS_I(inode)->delalloc_inodes);
@@ -1384,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
  */
 static int __btrfs_submit_bio_start(struct inode *inode, int rw,
                                    struct bio *bio, int mirror_num,
-                                   unsigned long bio_flags)
+                                   unsigned long bio_flags,
+                                   u64 bio_offset)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret = 0;
@@ -1403,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
  * are inserted into the btree
  */
 static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
-                         int mirror_num, unsigned long bio_flags)
+                         int mirror_num, unsigned long bio_flags,
+                         u64 bio_offset)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        return btrfs_map_bio(root, rw, bio, mirror_num, 1);
@@ -1414,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
  * on write, or reading the csums from the tree before a read
  */
 static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
-                         int mirror_num, unsigned long bio_flags)
+                         int mirror_num, unsigned long bio_flags,
+                         u64 bio_offset)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret = 0;
@@ -1439,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                /* we're doing a write, do the async checksumming */
                return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
                                   inode, rw, bio, mirror_num,
-                                  bio_flags, __btrfs_submit_bio_start,
+                                  bio_flags, bio_offset,
+                                  __btrfs_submit_bio_start,
                                   __btrfs_submit_bio_done);
        }
 
@@ -1520,6 +1525,7 @@ again:
                goto again;
        }
 
+       BUG();
        btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
        ClearPageChecked(page);
 out:
@@ -1650,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
 static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
+       struct btrfs_trans_handle *trans = NULL;
        struct btrfs_ordered_extent *ordered_extent = NULL;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_state *cached_state = NULL;
@@ -1668,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
                if (!ret) {
                        trans = btrfs_join_transaction(root, 1);
+                       btrfs_set_trans_block_group(trans, inode);
+                       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                        ret = btrfs_update_inode(trans, root, inode);
                        BUG_ON(ret);
-                       btrfs_end_transaction(trans, root);
                }
                goto out;
        }
@@ -1680,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                         0, &cached_state, GFP_NOFS);
 
        trans = btrfs_join_transaction(root, 1);
+       btrfs_set_trans_block_group(trans, inode);
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
                compressed = 1;
@@ -1711,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        add_pending_csums(trans, inode, ordered_extent->file_offset,
                          &ordered_extent->list);
 
-       /* this also removes the ordered extent from the tree */
        btrfs_ordered_update_i_size(inode, 0, ordered_extent);
        ret = btrfs_update_inode(trans, root, inode);
        BUG_ON(ret);
-       btrfs_end_transaction(trans, root);
 out:
+       btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+       if (trans)
+               btrfs_end_transaction(trans, root);
        /* once for us */
        btrfs_put_ordered_extent(ordered_extent);
        /* once for the tree */
@@ -1838,7 +1848,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
 
        BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
                                                      failrec->last_mirror,
-                                                     failrec->bio_flags);
+                                                     failrec->bio_flags, 0);
        return 0;
 }
 
@@ -1993,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
 }
 
 /*
+ * calculate extra metadata reservation when snapshotting a subvolume
+ * contains orphan files.
+ */
+void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
+                               struct btrfs_pending_snapshot *pending,
+                               u64 *bytes_to_reserve)
+{
+       struct btrfs_root *root;
+       struct btrfs_block_rsv *block_rsv;
+       u64 num_bytes;
+       int index;
+
+       root = pending->root;
+       if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
+               return;
+
+       block_rsv = root->orphan_block_rsv;
+
+       /* orphan block reservation for the snapshot */
+       num_bytes = block_rsv->size;
+
+       /*
+        * after the snapshot is created, COWing tree blocks may use more
+        * space than it frees. So we should make sure there is enough
+        * reserved space.
+        */
+       index = trans->transid & 0x1;
+       if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+               num_bytes += block_rsv->size -
+                            (block_rsv->reserved + block_rsv->freed[index]);
+       }
+
+       *bytes_to_reserve += num_bytes;
+}
+
+void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
+                               struct btrfs_pending_snapshot *pending)
+{
+       struct btrfs_root *root = pending->root;
+       struct btrfs_root *snap = pending->snap;
+       struct btrfs_block_rsv *block_rsv;
+       u64 num_bytes;
+       int index;
+       int ret;
+
+       if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
+               return;
+
+       /* refill source subvolume's orphan block reservation */
+       block_rsv = root->orphan_block_rsv;
+       index = trans->transid & 0x1;
+       if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+               num_bytes = block_rsv->size -
+                           (block_rsv->reserved + block_rsv->freed[index]);
+               ret = btrfs_block_rsv_migrate(&pending->block_rsv,
+                                             root->orphan_block_rsv,
+                                             num_bytes);
+               BUG_ON(ret);
+       }
+
+       /* setup orphan block reservation for the snapshot */
+       block_rsv = btrfs_alloc_block_rsv(snap);
+       BUG_ON(!block_rsv);
+
+       btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
+       snap->orphan_block_rsv = block_rsv;
+
+       num_bytes = root->orphan_block_rsv->size;
+       ret = btrfs_block_rsv_migrate(&pending->block_rsv,
+                                     block_rsv, num_bytes);
+       BUG_ON(ret);
+
+#if 0
+       /* insert orphan item for the snapshot */
+       WARN_ON(!root->orphan_item_inserted);
+       ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
+                                      snap->root_key.objectid);
+       BUG_ON(ret);
+       snap->orphan_item_inserted = 1;
+#endif
+}
+
+enum btrfs_orphan_cleanup_state {
+       ORPHAN_CLEANUP_STARTED  = 1,
+       ORPHAN_CLEANUP_DONE     = 2,
+};
+
+/*
+ * This is called in transaction commmit time. If there are no orphan
+ * files in the subvolume, it removes orphan item and frees block_rsv
+ * structure.
+ */
+void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root)
+{
+       int ret;
+
+       if (!list_empty(&root->orphan_list) ||
+           root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
+               return;
+
+       if (root->orphan_item_inserted &&
+           btrfs_root_refs(&root->root_item) > 0) {
+               ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
+                                           root->root_key.objectid);
+               BUG_ON(ret);
+               root->orphan_item_inserted = 0;
+       }
+
+       if (root->orphan_block_rsv) {
+               WARN_ON(root->orphan_block_rsv->size > 0);
+               btrfs_free_block_rsv(root, root->orphan_block_rsv);
+               root->orphan_block_rsv = NULL;
+       }
+}
+
+/*
  * This creates an orphan entry for the given inode in case something goes
  * wrong in the middle of an unlink/truncate.
+ *
+ * NOTE: caller of this function should reserve 5 units of metadata for
+ *      this function.
  */
 int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       int ret = 0;
+       struct btrfs_block_rsv *block_rsv = NULL;
+       int reserve = 0;
+       int insert = 0;
+       int ret;
+
+       if (!root->orphan_block_rsv) {
+               block_rsv = btrfs_alloc_block_rsv(root);
+               BUG_ON(!block_rsv);
+       }
 
-       spin_lock(&root->list_lock);
+       spin_lock(&root->orphan_lock);
+       if (!root->orphan_block_rsv) {
+               root->orphan_block_rsv = block_rsv;
+       } else if (block_rsv) {
+               btrfs_free_block_rsv(root, block_rsv);
+               block_rsv = NULL;
+       }
 
-       /* already on the orphan list, we're good */
-       if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
-               spin_unlock(&root->list_lock);
-               return 0;
+       if (list_empty(&BTRFS_I(inode)->i_orphan)) {
+               list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
+#if 0
+               /*
+                * For proper ENOSPC handling, we should do orphan
+                * cleanup when mounting. But this introduces backward
+                * compatibility issue.
+                */
+               if (!xchg(&root->orphan_item_inserted, 1))
+                       insert = 2;
+               else
+                       insert = 1;
+#endif
+               insert = 1;
+       } else {
+               WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
        }
 
-       list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
+       if (!BTRFS_I(inode)->orphan_meta_reserved) {
+               BTRFS_I(inode)->orphan_meta_reserved = 1;
+               reserve = 1;
+       }
+       spin_unlock(&root->orphan_lock);
 
-       spin_unlock(&root->list_lock);
+       if (block_rsv)
+               btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
 
-       /*
-        * insert an orphan item to track this unlinked/truncated file
-        */
-       ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
+       /* grab metadata reservation from transaction handle */
+       if (reserve) {
+               ret = btrfs_orphan_reserve_metadata(trans, inode);
+               BUG_ON(ret);
+       }
 
-       return ret;
+       /* insert an orphan item to track this unlinked/truncated file */
+       if (insert >= 1) {
+               ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
+               BUG_ON(ret);
+       }
+
+       /* insert an orphan item to track subvolume contains orphan files */
+       if (insert >= 2) {
+               ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
+                                              root->root_key.objectid);
+               BUG_ON(ret);
+       }
+       return 0;
 }
 
 /*
@@ -2028,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       int delete_item = 0;
+       int release_rsv = 0;
        int ret = 0;
 
-       spin_lock(&root->list_lock);
-
-       if (list_empty(&BTRFS_I(inode)->i_orphan)) {
-               spin_unlock(&root->list_lock);
-               return 0;
+       spin_lock(&root->orphan_lock);
+       if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
+               list_del_init(&BTRFS_I(inode)->i_orphan);
+               delete_item = 1;
        }
 
-       list_del_init(&BTRFS_I(inode)->i_orphan);
-       if (!trans) {
-               spin_unlock(&root->list_lock);
-               return 0;
+       if (BTRFS_I(inode)->orphan_meta_reserved) {
+               BTRFS_I(inode)->orphan_meta_reserved = 0;
+               release_rsv = 1;
        }
+       spin_unlock(&root->orphan_lock);
 
-       spin_unlock(&root->list_lock);
+       if (trans && delete_item) {
+               ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
+               BUG_ON(ret);
+       }
 
-       ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
+       if (release_rsv)
+               btrfs_orphan_release_metadata(inode);
 
-       return ret;
+       return 0;
 }
 
 /*
@@ -2064,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
        struct inode *inode;
        int ret = 0, nr_unlink = 0, nr_truncate = 0;
 
-       if (!xchg(&root->clean_orphans, 0))
+       if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
                return;
 
        path = btrfs_alloc_path();
@@ -2117,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                found_key.type = BTRFS_INODE_ITEM_KEY;
                found_key.offset = 0;
                inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
-               if (IS_ERR(inode))
-                       break;
+               BUG_ON(IS_ERR(inode));
 
                /*
                 * add this inode to the orphan list so btrfs_orphan_del does
                 * the proper thing when we hit it
                 */
-               spin_lock(&root->list_lock);
+               spin_lock(&root->orphan_lock);
                list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
-               spin_unlock(&root->list_lock);
+               spin_unlock(&root->orphan_lock);
 
                /*
                 * if this is a bad inode, means we actually succeeded in
@@ -2135,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                 * do a destroy_inode
                 */
                if (is_bad_inode(inode)) {
-                       trans = btrfs_start_transaction(root, 1);
+                       trans = btrfs_start_transaction(root, 0);
                        btrfs_orphan_del(trans, inode);
                        btrfs_end_transaction(trans, root);
                        iput(inode);
@@ -2153,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                /* this will do delete_inode and everything for us */
                iput(inode);
        }
+       btrfs_free_path(path);
+
+       root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
+
+       if (root->orphan_block_rsv)
+               btrfs_block_rsv_release(root, root->orphan_block_rsv,
+                                       (u64)-1);
+
+       if (root->orphan_block_rsv || root->orphan_item_inserted) {
+               trans = btrfs_join_transaction(root, 1);
+               btrfs_end_transaction(trans, root);
+       }
 
        if (nr_unlink)
                printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
        if (nr_truncate)
                printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
-
-       btrfs_free_path(path);
 }
 
 /*
@@ -2478,103 +2666,276 @@ out:
        return ret;
 }
 
-static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+/* helper to check if there is any shared block in the path */
+static int check_path_shared(struct btrfs_root *root,
+                            struct btrfs_path *path)
 {
-       struct btrfs_root *root;
-       struct btrfs_trans_handle *trans;
-       struct inode *inode = dentry->d_inode;
+       struct extent_buffer *eb;
+       int level;
        int ret;
-       unsigned long nr = 0;
-
-       root = BTRFS_I(dir)->root;
-
-       /*
-        * 5 items for unlink inode
-        * 1 for orphan
-        */
-       ret = btrfs_reserve_metadata_space(root, 6);
-       if (ret)
-               return ret;
+       u64 refs;
 
-       trans = btrfs_start_transaction(root, 1);
-       if (IS_ERR(trans)) {
-               btrfs_unreserve_metadata_space(root, 6);
-               return PTR_ERR(trans);
+       for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+               if (!path->nodes[level])
+                       break;
+               eb = path->nodes[level];
+               if (!btrfs_block_can_be_shared(root, eb))
+                       continue;
+               ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
+                                              &refs, NULL);
+               if (refs > 1)
+                       return 1;
        }
-
-       btrfs_set_trans_block_group(trans, dir);
-
-       btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
-
-       ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
-                                dentry->d_name.name, dentry->d_name.len);
-
-       if (inode->i_nlink == 0)
-               ret = btrfs_orphan_add(trans, inode);
-
-       nr = trans->blocks_used;
-
-       btrfs_end_transaction_throttle(trans, root);
-       btrfs_unreserve_metadata_space(root, 6);
-       btrfs_btree_balance_dirty(root, nr);
-       return ret;
+       return 0;
 }
 
-int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
-                       struct btrfs_root *root,
-                       struct inode *dir, u64 objectid,
-                       const char *name, int name_len)
+/*
+ * helper to start transaction for unlink and rmdir.
+ *
+ * unlink and rmdir are special in btrfs, they do not always free space.
+ * so in enospc case, we should make sure they will free space before
+ * allowing them to use the global metadata reservation.
+ */
+static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
+                                                      struct dentry *dentry)
 {
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_path *path;
-       struct extent_buffer *leaf;
+       struct btrfs_inode_ref *ref;
        struct btrfs_dir_item *di;
-       struct btrfs_key key;
+       struct inode *inode = dentry->d_inode;
        u64 index;
+       int check_link = 1;
+       int err = -ENOSPC;
        int ret;
 
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
+       trans = btrfs_start_transaction(root, 10);
+       if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
+               return trans;
 
-       di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
-                                  name, name_len, -1);
-       BUG_ON(!di || IS_ERR(di));
+       if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+               return ERR_PTR(-ENOSPC);
 
-       leaf = path->nodes[0];
-       btrfs_dir_item_key_to_cpu(leaf, di, &key);
-       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
-       ret = btrfs_delete_one_dir_name(trans, root, path, di);
-       BUG_ON(ret);
-       btrfs_release_path(root, path);
+       /* check if there is someone else holds reference */
+       if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
+               return ERR_PTR(-ENOSPC);
 
-       ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
-                                objectid, root->root_key.objectid,
-                                dir->i_ino, &index, name, name_len);
-       if (ret < 0) {
-               BUG_ON(ret != -ENOENT);
-               di = btrfs_search_dir_index_item(root, path, dir->i_ino,
-                                                name, name_len);
-               BUG_ON(!di || IS_ERR(di));
+       if (atomic_read(&inode->i_count) > 2)
+               return ERR_PTR(-ENOSPC);
 
-               leaf = path->nodes[0];
-               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-               btrfs_release_path(root, path);
-               index = key.offset;
+       if (xchg(&root->fs_info->enospc_unlink, 1))
+               return ERR_PTR(-ENOSPC);
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               root->fs_info->enospc_unlink = 0;
+               return ERR_PTR(-ENOMEM);
        }
 
-       di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
-                                        index, name, name_len, -1);
-       BUG_ON(!di || IS_ERR(di));
+       trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans)) {
+               btrfs_free_path(path);
+               root->fs_info->enospc_unlink = 0;
+               return trans;
+       }
 
-       leaf = path->nodes[0];
-       btrfs_dir_item_key_to_cpu(leaf, di, &key);
-       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
-       ret = btrfs_delete_one_dir_name(trans, root, path, di);
-       BUG_ON(ret);
-       btrfs_release_path(root, path);
+       path->skip_locking = 1;
+       path->search_commit_root = 1;
 
-       btrfs_i_size_write(dir, dir->i_size - name_len * 2);
-       dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+       ret = btrfs_lookup_inode(trans, root, path,
+                               &BTRFS_I(dir)->location, 0);
+       if (ret < 0) {
+               err = ret;
+               goto out;
+       }
+       if (ret == 0) {
+               if (check_path_shared(root, path))
+                       goto out;
+       } else {
+               check_link = 0;
+       }
+       btrfs_release_path(root, path);
+
+       ret = btrfs_lookup_inode(trans, root, path,
+                               &BTRFS_I(inode)->location, 0);
+       if (ret < 0) {
+               err = ret;
+               goto out;
+       }
+       if (ret == 0) {
+               if (check_path_shared(root, path))
+                       goto out;
+       } else {
+               check_link = 0;
+       }
+       btrfs_release_path(root, path);
+
+       if (ret == 0 && S_ISREG(inode->i_mode)) {
+               ret = btrfs_lookup_file_extent(trans, root, path,
+                                              inode->i_ino, (u64)-1, 0);
+               if (ret < 0) {
+                       err = ret;
+                       goto out;
+               }
+               BUG_ON(ret == 0);
+               if (check_path_shared(root, path))
+                       goto out;
+               btrfs_release_path(root, path);
+       }
+
+       if (!check_link) {
+               err = 0;
+               goto out;
+       }
+
+       di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+                               dentry->d_name.name, dentry->d_name.len, 0);
+       if (IS_ERR(di)) {
+               err = PTR_ERR(di);
+               goto out;
+       }
+       if (di) {
+               if (check_path_shared(root, path))
+                       goto out;
+       } else {
+               err = 0;
+               goto out;
+       }
+       btrfs_release_path(root, path);
+
+       ref = btrfs_lookup_inode_ref(trans, root, path,
+                               dentry->d_name.name, dentry->d_name.len,
+                               inode->i_ino, dir->i_ino, 0);
+       if (IS_ERR(ref)) {
+               err = PTR_ERR(ref);
+               goto out;
+       }
+       BUG_ON(!ref);
+       if (check_path_shared(root, path))
+               goto out;
+       index = btrfs_inode_ref_index(path->nodes[0], ref);
+       btrfs_release_path(root, path);
+
+       di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index,
+                               dentry->d_name.name, dentry->d_name.len, 0);
+       if (IS_ERR(di)) {
+               err = PTR_ERR(di);
+               goto out;
+       }
+       BUG_ON(ret == -ENOENT);
+       if (check_path_shared(root, path))
+               goto out;
+
+       err = 0;
+out:
+       btrfs_free_path(path);
+       if (err) {
+               btrfs_end_transaction(trans, root);
+               root->fs_info->enospc_unlink = 0;
+               return ERR_PTR(err);
+       }
+
+       trans->block_rsv = &root->fs_info->global_block_rsv;
+       return trans;
+}
+
+static void __unlink_end_trans(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root)
+{
+       if (trans->block_rsv == &root->fs_info->global_block_rsv) {
+               BUG_ON(!root->fs_info->enospc_unlink);
+               root->fs_info->enospc_unlink = 0;
+       }
+       btrfs_end_transaction_throttle(trans, root);
+}
+
+static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+       struct btrfs_root *root = BTRFS_I(dir)->root;
+       struct btrfs_trans_handle *trans;
+       struct inode *inode = dentry->d_inode;
+       int ret;
+       unsigned long nr = 0;
+
+       trans = __unlink_start_trans(dir, dentry);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       btrfs_set_trans_block_group(trans, dir);
+
+       btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
+
+       ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
+                                dentry->d_name.name, dentry->d_name.len);
+       BUG_ON(ret);
+
+       if (inode->i_nlink == 0) {
+               ret = btrfs_orphan_add(trans, inode);
+               BUG_ON(ret);
+       }
+
+       nr = trans->blocks_used;
+       __unlink_end_trans(trans, root);
+       btrfs_btree_balance_dirty(root, nr);
+       return ret;
+}
+
+int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct inode *dir, u64 objectid,
+                       const char *name, int name_len)
+{
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+       u64 index;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+                                  name, name_len, -1);
+       BUG_ON(!di || IS_ERR(di));
+
+       leaf = path->nodes[0];
+       btrfs_dir_item_key_to_cpu(leaf, di, &key);
+       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+       BUG_ON(ret);
+       btrfs_release_path(root, path);
+
+       ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
+                                objectid, root->root_key.objectid,
+                                dir->i_ino, &index, name, name_len);
+       if (ret < 0) {
+               BUG_ON(ret != -ENOENT);
+               di = btrfs_search_dir_index_item(root, path, dir->i_ino,
+                                                name, name_len);
+               BUG_ON(!di || IS_ERR(di));
+
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               btrfs_release_path(root, path);
+               index = key.offset;
+       }
+
+       di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
+                                        index, name, name_len, -1);
+       BUG_ON(!di || IS_ERR(di));
+
+       leaf = path->nodes[0];
+       btrfs_dir_item_key_to_cpu(leaf, di, &key);
+       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+       BUG_ON(ret);
+       btrfs_release_path(root, path);
+
+       btrfs_i_size_write(dir, dir->i_size - name_len * 2);
+       dir->i_mtime = dir->i_ctime = CURRENT_TIME;
        ret = btrfs_update_inode(trans, root, dir);
        BUG_ON(ret);
        dir->i_sb->s_dirt = 1;
@@ -2587,7 +2948,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
        struct inode *inode = dentry->d_inode;
        int err = 0;
-       int ret;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_trans_handle *trans;
        unsigned long nr = 0;
@@ -2596,15 +2956,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
            inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
                return -ENOTEMPTY;
 
-       ret = btrfs_reserve_metadata_space(root, 5);
-       if (ret)
-               return ret;
-
-       trans = btrfs_start_transaction(root, 1);
-       if (IS_ERR(trans)) {
-               btrfs_unreserve_metadata_space(root, 5);
+       trans = __unlink_start_trans(dir, dentry);
+       if (IS_ERR(trans))
                return PTR_ERR(trans);
-       }
 
        btrfs_set_trans_block_group(trans, dir);
 
@@ -2627,12 +2981,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
                btrfs_i_size_write(inode, 0);
 out:
        nr = trans->blocks_used;
-       ret = btrfs_end_transaction_throttle(trans, root);
-       btrfs_unreserve_metadata_space(root, 5);
+       __unlink_end_trans(trans, root);
        btrfs_btree_balance_dirty(root, nr);
 
-       if (ret && !err)
-               err = ret;
        return err;
 }
 
@@ -3029,6 +3380,7 @@ out:
        if (pending_del_nr) {
                ret = btrfs_del_items(trans, root, path, pending_del_slot,
                                      pending_del_nr);
+               BUG_ON(ret);
        }
        btrfs_free_path(path);
        return err;
@@ -3056,11 +3408,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 
        if ((offset & (blocksize - 1)) == 0)
                goto out;
-       ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
-       if (ret)
-               goto out;
-
-       ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
+       ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
        if (ret)
                goto out;
 
@@ -3068,8 +3416,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 again:
        page = grab_cache_page(mapping, index);
        if (!page) {
-               btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
-               btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+               btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
                goto out;
        }
 
@@ -3132,8 +3479,7 @@ again:
 
 out_unlock:
        if (ret)
-               btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
-       btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+               btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
        unlock_page(page);
        page_cache_release(page);
 out:
@@ -3145,7 +3491,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-       struct extent_map *em;
+       struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
        u64 mask = root->sectorsize - 1;
        u64 hole_start = (inode->i_size + mask) & ~mask;
@@ -3183,11 +3529,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
                        u64 hint_byte = 0;
                        hole_size = last_byte - cur_offset;
 
-                       err = btrfs_reserve_metadata_space(root, 2);
-                       if (err)
+                       trans = btrfs_start_transaction(root, 2);
+                       if (IS_ERR(trans)) {
+                               err = PTR_ERR(trans);
                                break;
-
-                       trans = btrfs_start_transaction(root, 1);
+                       }
                        btrfs_set_trans_block_group(trans, inode);
 
                        err = btrfs_drop_extents(trans, inode, cur_offset,
@@ -3205,14 +3551,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
                                        last_byte - 1, 0);
 
                        btrfs_end_transaction(trans, root);
-                       btrfs_unreserve_metadata_space(root, 2);
                }
                free_extent_map(em);
+               em = NULL;
                cur_offset = last_byte;
                if (cur_offset >= block_end)
                        break;
        }
 
+       free_extent_map(em);
        unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
                             GFP_NOFS);
        return err;
@@ -3239,11 +3586,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
                }
        }
 
-       ret = btrfs_reserve_metadata_space(root, 1);
-       if (ret)
-               return ret;
+       trans = btrfs_start_transaction(root, 5);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
 
-       trans = btrfs_start_transaction(root, 1);
        btrfs_set_trans_block_group(trans, inode);
 
        ret = btrfs_orphan_add(trans, inode);
@@ -3251,7 +3597,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
 
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
-       btrfs_unreserve_metadata_space(root, 1);
        btrfs_btree_balance_dirty(root, nr);
 
        if (attr->ia_size > inode->i_size) {
@@ -3264,8 +3609,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
                i_size_write(inode, attr->ia_size);
                btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
 
-               trans = btrfs_start_transaction(root, 1);
+               trans = btrfs_start_transaction(root, 0);
+               BUG_ON(IS_ERR(trans));
                btrfs_set_trans_block_group(trans, inode);
+               trans->block_rsv = root->orphan_block_rsv;
+               BUG_ON(!trans->block_rsv);
 
                ret = btrfs_update_inode(trans, root, inode);
                BUG_ON(ret);
@@ -3345,10 +3693,21 @@ void btrfs_delete_inode(struct inode *inode)
        btrfs_i_size_write(inode, 0);
 
        while (1) {
-               trans = btrfs_start_transaction(root, 1);
+               trans = btrfs_start_transaction(root, 0);
+               BUG_ON(IS_ERR(trans));
                btrfs_set_trans_block_group(trans, inode);
-               ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
+               trans->block_rsv = root->orphan_block_rsv;
+
+               ret = btrfs_block_rsv_check(trans, root,
+                                           root->orphan_block_rsv, 0, 5);
+               if (ret) {
+                       BUG_ON(ret != -EAGAIN);
+                       ret = btrfs_commit_transaction(trans, root);
+                       BUG_ON(ret);
+                       continue;
+               }
 
+               ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
                if (ret != -EAGAIN)
                        break;
 
@@ -3356,6 +3715,7 @@ void btrfs_delete_inode(struct inode *inode)
                btrfs_end_transaction(trans, root);
                trans = NULL;
                btrfs_btree_balance_dirty(root, nr);
+
        }
 
        if (ret == 0) {
@@ -3596,40 +3956,10 @@ again:
        return 0;
 }
 
-static noinline void init_btrfs_i(struct inode *inode)
-{
-       struct btrfs_inode *bi = BTRFS_I(inode);
-
-       bi->generation = 0;
-       bi->sequence = 0;
-       bi->last_trans = 0;
-       bi->last_sub_trans = 0;
-       bi->logged_trans = 0;
-       bi->delalloc_bytes = 0;
-       bi->reserved_bytes = 0;
-       bi->disk_i_size = 0;
-       bi->flags = 0;
-       bi->index_cnt = (u64)-1;
-       bi->last_unlink_trans = 0;
-       bi->ordered_data_close = 0;
-       bi->force_compress = 0;
-       extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
-       extent_io_tree_init(&BTRFS_I(inode)->io_tree,
-                            inode->i_mapping, GFP_NOFS);
-       extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
-                            inode->i_mapping, GFP_NOFS);
-       INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
-       INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
-       RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
-       btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
-       mutex_init(&BTRFS_I(inode)->log_mutex);
-}
-
 static int btrfs_init_locked_inode(struct inode *inode, void *p)
 {
        struct btrfs_iget_args *args = p;
        inode->i_ino = args->ino;
-       init_btrfs_i(inode);
        BTRFS_I(inode)->root = args->root;
        btrfs_set_inode_space_info(args->root, inode);
        return 0;
@@ -3692,8 +4022,6 @@ static struct inode *new_simple_dir(struct super_block *s,
        if (!inode)
                return ERR_PTR(-ENOMEM);
 
-       init_btrfs_i(inode);
-
        BTRFS_I(inode)->root = root;
        memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
        BTRFS_I(inode)->dummy_inode = 1;
@@ -3950,7 +4278,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        struct btrfs_trans_handle *trans;
        int ret = 0;
 
-       if (root->fs_info->btree_inode == inode)
+       if (BTRFS_I(inode)->dummy_inode)
                return 0;
 
        if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -3971,10 +4299,38 @@ void btrfs_dirty_inode(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
+       int ret;
+
+       if (BTRFS_I(inode)->dummy_inode)
+               return;
 
        trans = btrfs_join_transaction(root, 1);
        btrfs_set_trans_block_group(trans, inode);
-       btrfs_update_inode(trans, root, inode);
+
+       ret = btrfs_update_inode(trans, root, inode);
+       if (ret && ret == -ENOSPC) {
+               /* whoops, lets try again with the full transaction */
+               btrfs_end_transaction(trans, root);
+               trans = btrfs_start_transaction(root, 1);
+               if (IS_ERR(trans)) {
+                       if (printk_ratelimit()) {
+                               printk(KERN_ERR "btrfs: fail to "
+                                      "dirty  inode %lu error %ld\n",
+                                      inode->i_ino, PTR_ERR(trans));
+                       }
+                       return;
+               }
+               btrfs_set_trans_block_group(trans, inode);
+
+               ret = btrfs_update_inode(trans, root, inode);
+               if (ret) {
+                       if (printk_ratelimit()) {
+                               printk(KERN_ERR "btrfs: fail to "
+                                      "dirty  inode %lu error %d\n",
+                                      inode->i_ino, ret);
+                       }
+               }
+       }
        btrfs_end_transaction(trans, root);
 }
 
@@ -4092,7 +4448,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
         * btrfs_get_inode_index_count has an explanation for the magic
         * number
         */
-       init_btrfs_i(inode);
        BTRFS_I(inode)->index_cnt = 2;
        BTRFS_I(inode)->root = root;
        BTRFS_I(inode)->generation = trans->transid;
@@ -4247,26 +4602,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
        if (!new_valid_dev(rdev))
                return -EINVAL;
 
+       err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
+       if (err)
+               return err;
+
        /*
         * 2 for inode item and ref
         * 2 for dir items
         * 1 for xattr if selinux is on
         */
-       err = btrfs_reserve_metadata_space(root, 5);
-       if (err)
-               return err;
+       trans = btrfs_start_transaction(root, 5);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
 
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans)
-               goto fail;
        btrfs_set_trans_block_group(trans, dir);
 
-       err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
-       if (err) {
-               err = -ENOSPC;
-               goto out_unlock;
-       }
-
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
                                dentry->d_name.len,
                                dentry->d_parent->d_inode->i_ino, objectid,
@@ -4295,13 +4645,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 out_unlock:
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
-fail:
-       btrfs_unreserve_metadata_space(root, 5);
+       btrfs_btree_balance_dirty(root, nr);
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
        }
-       btrfs_btree_balance_dirty(root, nr);
        return err;
 }
 
@@ -4311,32 +4659,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct inode *inode = NULL;
-       int err;
        int drop_inode = 0;
+       int err;
        unsigned long nr = 0;
        u64 objectid;
        u64 index = 0;
 
+       err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
+       if (err)
+               return err;
        /*
         * 2 for inode item and ref
         * 2 for dir items
         * 1 for xattr if selinux is on
         */
-       err = btrfs_reserve_metadata_space(root, 5);
-       if (err)
-               return err;
+       trans = btrfs_start_transaction(root, 5);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
 
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans)
-               goto fail;
        btrfs_set_trans_block_group(trans, dir);
 
-       err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
-       if (err) {
-               err = -ENOSPC;
-               goto out_unlock;
-       }
-
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
                                dentry->d_name.len,
                                dentry->d_parent->d_inode->i_ino,
@@ -4368,8 +4710,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 out_unlock:
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
-fail:
-       btrfs_unreserve_metadata_space(root, 5);
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
@@ -4396,21 +4736,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        if (root->objectid != BTRFS_I(inode)->root->objectid)
                return -EPERM;
 
-       /*
-        * 1 item for inode ref
-        * 2 items for dir items
-        */
-       err = btrfs_reserve_metadata_space(root, 3);
-       if (err)
-               return err;
-
        btrfs_inc_nlink(inode);
 
        err = btrfs_set_inode_index(dir, &index);
        if (err)
                goto fail;
 
-       trans = btrfs_start_transaction(root, 1);
+       /*
+        * 1 item for inode ref
+        * 2 items for dir items
+        */
+       trans = btrfs_start_transaction(root, 3);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               goto fail;
+       }
 
        btrfs_set_trans_block_group(trans, dir);
        atomic_inc(&inode->i_count);
@@ -4429,7 +4769,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
 fail:
-       btrfs_unreserve_metadata_space(root, 3);
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
@@ -4449,28 +4788,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        u64 index = 0;
        unsigned long nr = 1;
 
+       err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
+       if (err)
+               return err;
+
        /*
         * 2 items for inode and ref
         * 2 items for dir items
         * 1 for xattr if selinux is on
         */
-       err = btrfs_reserve_metadata_space(root, 5);
-       if (err)
-               return err;
-
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans) {
-               err = -ENOMEM;
-               goto out_unlock;
-       }
+       trans = btrfs_start_transaction(root, 5);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
        btrfs_set_trans_block_group(trans, dir);
 
-       err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
-       if (err) {
-               err = -ENOSPC;
-               goto out_fail;
-       }
-
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
                                dentry->d_name.len,
                                dentry->d_parent->d_inode->i_ino, objectid,
@@ -4510,9 +4841,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 out_fail:
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
-
-out_unlock:
-       btrfs_unreserve_metadata_space(root, 5);
        if (drop_on_err)
                iput(inode);
        btrfs_btree_balance_dirty(root, nr);
@@ -4770,6 +5098,7 @@ again:
                        }
                        flush_dcache_page(page);
                } else if (create && PageUptodate(page)) {
+                       WARN_ON(1);
                        if (!trans) {
                                kunmap(page);
                                free_extent_map(em);
@@ -4866,11 +5195,651 @@ out:
        return em;
 }
 
+static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
+                                                 u64 start, u64 len)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_trans_handle *trans;
+       struct extent_map *em;
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct btrfs_key ins;
+       u64 alloc_hint;
+       int ret;
+
+       btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+
+       trans = btrfs_join_transaction(root, 0);
+       if (!trans)
+               return ERR_PTR(-ENOMEM);
+
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+
+       alloc_hint = get_extent_allocation_hint(inode, start, len);
+       ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
+                                  alloc_hint, (u64)-1, &ins, 1);
+       if (ret) {
+               em = ERR_PTR(ret);
+               goto out;
+       }
+
+       em = alloc_extent_map(GFP_NOFS);
+       if (!em) {
+               em = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
+       em->start = start;
+       em->orig_start = em->start;
+       em->len = ins.offset;
+
+       em->block_start = ins.objectid;
+       em->block_len = ins.offset;
+       em->bdev = root->fs_info->fs_devices->latest_bdev;
+       set_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+       while (1) {
+               write_lock(&em_tree->lock);
+               ret = add_extent_mapping(em_tree, em);
+               write_unlock(&em_tree->lock);
+               if (ret != -EEXIST)
+                       break;
+               btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
+       }
+
+       ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
+                                          ins.offset, ins.offset, 0);
+       if (ret) {
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
+               em = ERR_PTR(ret);
+       }
+out:
+       btrfs_end_transaction(trans, root);
+       return em;
+}
+
+/*
+ * returns 1 when the nocow is safe, < 1 on error, 0 if the
+ * block must be cow'd
+ */
+static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
+                                     struct inode *inode, u64 offset, u64 len)
+{
+       struct btrfs_path *path;
+       int ret;
+       struct extent_buffer *leaf;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_file_extent_item *fi;
+       struct btrfs_key key;
+       u64 disk_bytenr;
+       u64 backref_offset;
+       u64 extent_end;
+       u64 num_bytes;
+       int slot;
+       int found_type;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+                                      offset, 0);
+       if (ret < 0)
+               goto out;
+
+       slot = path->slots[0];
+       if (ret == 1) {
+               if (slot == 0) {
+                       /* can't find the item, must cow */
+                       ret = 0;
+                       goto out;
+               }
+               slot--;
+       }
+       ret = 0;
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &key, slot);
+       if (key.objectid != inode->i_ino ||
+           key.type != BTRFS_EXTENT_DATA_KEY) {
+               /* not our file or wrong item type, must cow */
+               goto out;
+       }
+
+       if (key.offset > offset) {
+               /* Wrong offset, must cow */
+               goto out;
+       }
+
+       fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+       found_type = btrfs_file_extent_type(leaf, fi);
+       if (found_type != BTRFS_FILE_EXTENT_REG &&
+           found_type != BTRFS_FILE_EXTENT_PREALLOC) {
+               /* not a regular extent, must cow */
+               goto out;
+       }
+       disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+       backref_offset = btrfs_file_extent_offset(leaf, fi);
+
+       extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+       if (extent_end < offset + len) {
+               /* extent doesn't include our full range, must cow */
+               goto out;
+       }
+
+       if (btrfs_extent_readonly(root, disk_bytenr))
+               goto out;
+
+       /*
+        * look for other files referencing this extent, if we
+        * find any we must cow
+        */
+       if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
+                                 key.offset - backref_offset, disk_bytenr))
+               goto out;
+
+       /*
+        * adjust disk_bytenr and num_bytes to cover just the bytes
+        * in this extent we are about to write.  If there
+        * are any csums in that range we have to cow in order
+        * to keep the csums correct
+        */
+       disk_bytenr += backref_offset;
+       disk_bytenr += offset - key.offset;
+       num_bytes = min(offset + len, extent_end) - offset;
+       if (csum_exist_in_range(root, disk_bytenr, num_bytes))
+                               goto out;
+       /*
+        * all of the above have passed, it is safe to overwrite this extent
+        * without cow
+        */
+       ret = 1;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
+                                  struct buffer_head *bh_result, int create)
+{
+       struct extent_map *em;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       u64 start = iblock << inode->i_blkbits;
+       u64 len = bh_result->b_size;
+       struct btrfs_trans_handle *trans;
+
+       em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
+       if (IS_ERR(em))
+               return PTR_ERR(em);
+
+       /*
+        * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
+        * io.  INLINE is special, and we could probably kludge it in here, but
+        * it's still buffered so for safety lets just fall back to the generic
+        * buffered path.
+        *
+        * For COMPRESSED we _have_ to read the entire extent in so we can
+        * decompress it, so there will be buffering required no matter what we
+        * do, so go ahead and fallback to buffered.
+        *
+        * We return -ENOTBLK because thats what makes DIO go ahead and go back
+        * to buffered IO.  Don't blame me, this is the price we pay for using
+        * the generic code.
+        */
+       if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
+           em->block_start == EXTENT_MAP_INLINE) {
+               free_extent_map(em);
+               return -ENOTBLK;
+       }
+
+       /* Just a good old fashioned hole, return */
+       if (!create && (em->block_start == EXTENT_MAP_HOLE ||
+                       test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+               free_extent_map(em);
+               /* DIO will do one hole at a time, so just unlock a sector */
+               unlock_extent(&BTRFS_I(inode)->io_tree, start,
+                             start + root->sectorsize - 1, GFP_NOFS);
+               return 0;
+       }
+
+       /*
+        * We don't allocate a new extent in the following cases
+        *
+        * 1) The inode is marked as NODATACOW.  In this case we'll just use the
+        * existing extent.
+        * 2) The extent is marked as PREALLOC.  We're good to go here and can
+        * just use the extent.
+        *
+        */
+       if (!create) {
+               len = em->len - (start - em->start);
+               goto map;
+       }
+
+       if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
+           ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
+            em->block_start != EXTENT_MAP_HOLE)) {
+               int type;
+               int ret;
+               u64 block_start;
+
+               if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+                       type = BTRFS_ORDERED_PREALLOC;
+               else
+                       type = BTRFS_ORDERED_NOCOW;
+               len = min(len, em->len - (start - em->start));
+               block_start = em->block_start + (start - em->start);
+
+               /*
+                * we're not going to log anything, but we do need
+                * to make sure the current transaction stays open
+                * while we look for nocow cross refs
+                */
+               trans = btrfs_join_transaction(root, 0);
+               if (!trans)
+                       goto must_cow;
+
+               if (can_nocow_odirect(trans, inode, start, len) == 1) {
+                       ret = btrfs_add_ordered_extent_dio(inode, start,
+                                          block_start, len, len, type);
+                       btrfs_end_transaction(trans, root);
+                       if (ret) {
+                               free_extent_map(em);
+                               return ret;
+                       }
+                       goto unlock;
+               }
+               btrfs_end_transaction(trans, root);
+       }
+must_cow:
+       /*
+        * this will cow the extent, reset the len in case we changed
+        * it above
+        */
+       len = bh_result->b_size;
+       free_extent_map(em);
+       em = btrfs_new_extent_direct(inode, start, len);
+       if (IS_ERR(em))
+               return PTR_ERR(em);
+       len = min(len, em->len - (start - em->start));
+unlock:
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
+                         EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
+                         0, NULL, GFP_NOFS);
+map:
+       bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
+               inode->i_blkbits;
+       bh_result->b_size = len;
+       bh_result->b_bdev = em->bdev;
+       set_buffer_mapped(bh_result);
+       if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+               set_buffer_new(bh_result);
+
+       free_extent_map(em);
+
+       return 0;
+}
+
+struct btrfs_dio_private {
+       struct inode *inode;
+       u64 logical_offset;
+       u64 disk_bytenr;
+       u64 bytes;
+       u32 *csums;
+       void *private;
+};
+
+static void btrfs_endio_direct_read(struct bio *bio, int err)
+{
+       struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
+       struct bio_vec *bvec = bio->bi_io_vec;
+       struct btrfs_dio_private *dip = bio->bi_private;
+       struct inode *inode = dip->inode;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       u64 start;
+       u32 *private = dip->csums;
+
+       start = dip->logical_offset;
+       do {
+               if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+                       struct page *page = bvec->bv_page;
+                       char *kaddr;
+                       u32 csum = ~(u32)0;
+                       unsigned long flags;
+
+                       local_irq_save(flags);
+                       kaddr = kmap_atomic(page, KM_IRQ0);
+                       csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
+                                              csum, bvec->bv_len);
+                       btrfs_csum_final(csum, (char *)&csum);
+                       kunmap_atomic(kaddr, KM_IRQ0);
+                       local_irq_restore(flags);
+
+                       flush_dcache_page(bvec->bv_page);
+                       if (csum != *private) {
+                               printk(KERN_ERR "btrfs csum failed ino %lu off"
+                                     " %llu csum %u private %u\n",
+                                     inode->i_ino, (unsigned long long)start,
+                                     csum, *private);
+                               err = -EIO;
+                       }
+               }
+
+               start += bvec->bv_len;
+               private++;
+               bvec++;
+       } while (bvec <= bvec_end);
+
+       unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
+                     dip->logical_offset + dip->bytes - 1, GFP_NOFS);
+       bio->bi_private = dip->private;
+
+       kfree(dip->csums);
+       kfree(dip);
+       dio_end_io(bio, err);
+}
+
+static void btrfs_endio_direct_write(struct bio *bio, int err)
+{
+       struct btrfs_dio_private *dip = bio->bi_private;
+       struct inode *inode = dip->inode;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_trans_handle *trans;
+       struct btrfs_ordered_extent *ordered = NULL;
+       struct extent_state *cached_state = NULL;
+       int ret;
+
+       if (err)
+               goto out_done;
+
+       ret = btrfs_dec_test_ordered_pending(inode, &ordered,
+                                            dip->logical_offset, dip->bytes);
+       if (!ret)
+               goto out_done;
+
+       BUG_ON(!ordered);
+
+       trans = btrfs_join_transaction(root, 1);
+       if (!trans) {
+               err = -ENOMEM;
+               goto out;
+       }
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+
+       if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+               ret = btrfs_ordered_update_i_size(inode, 0, ordered);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, root, inode);
+               err = ret;
+               goto out;
+       }
+
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
+                        ordered->file_offset + ordered->len - 1, 0,
+                        &cached_state, GFP_NOFS);
+
+       if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
+               ret = btrfs_mark_extent_written(trans, inode,
+                                               ordered->file_offset,
+                                               ordered->file_offset +
+                                               ordered->len);
+               if (ret) {
+                       err = ret;
+                       goto out_unlock;
+               }
+       } else {
+               ret = insert_reserved_file_extent(trans, inode,
+                                                 ordered->file_offset,
+                                                 ordered->start,
+                                                 ordered->disk_len,
+                                                 ordered->len,
+                                                 ordered->len,
+                                                 0, 0, 0,
+                                                 BTRFS_FILE_EXTENT_REG);
+               unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+                                  ordered->file_offset, ordered->len);
+               if (ret) {
+                       err = ret;
+                       WARN_ON(1);
+                       goto out_unlock;
+               }
+       }
+
+       add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
+       btrfs_ordered_update_i_size(inode, 0, ordered);
+       btrfs_update_inode(trans, root, inode);
+out_unlock:
+       unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
+                            ordered->file_offset + ordered->len - 1,
+                            &cached_state, GFP_NOFS);
+out:
+       btrfs_delalloc_release_metadata(inode, ordered->len);
+       btrfs_end_transaction(trans, root);
+       btrfs_put_ordered_extent(ordered);
+       btrfs_put_ordered_extent(ordered);
+out_done:
+       bio->bi_private = dip->private;
+
+       kfree(dip->csums);
+       kfree(dip);
+       dio_end_io(bio, err);
+}
+
+static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
+                                   struct bio *bio, int mirror_num,
+                                   unsigned long bio_flags, u64 offset)
+{
+       int ret;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
+       BUG_ON(ret);
+       return 0;
+}
+
+static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
+                               loff_t file_offset)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_dio_private *dip;
+       struct bio_vec *bvec = bio->bi_io_vec;
+       u64 start;
+       int skip_sum;
+       int write = rw & (1 << BIO_RW);
+       int ret = 0;
+
+       skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+
+       dip = kmalloc(sizeof(*dip), GFP_NOFS);
+       if (!dip) {
+               ret = -ENOMEM;
+               goto free_ordered;
+       }
+       dip->csums = NULL;
+
+       if (!skip_sum) {
+               dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
+               if (!dip->csums) {
+                       ret = -ENOMEM;
+                       goto free_ordered;
+               }
+       }
+
+       dip->private = bio->bi_private;
+       dip->inode = inode;
+       dip->logical_offset = file_offset;
+
+       start = dip->logical_offset;
+       dip->bytes = 0;
+       do {
+               dip->bytes += bvec->bv_len;
+               bvec++;
+       } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
+
+       dip->disk_bytenr = (u64)bio->bi_sector << 9;
+       bio->bi_private = dip;
+
+       if (write)
+               bio->bi_end_io = btrfs_endio_direct_write;
+       else
+               bio->bi_end_io = btrfs_endio_direct_read;
+
+       ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+       if (ret)
+               goto out_err;
+
+       if (write && !skip_sum) {
+               ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+                                  inode, rw, bio, 0, 0,
+                                  dip->logical_offset,
+                                  __btrfs_submit_bio_start_direct_io,
+                                  __btrfs_submit_bio_done);
+               if (ret)
+                       goto out_err;
+               return;
+       } else if (!skip_sum)
+               btrfs_lookup_bio_sums_dio(root, inode, bio,
+                                         dip->logical_offset, dip->csums);
+
+       ret = btrfs_map_bio(root, rw, bio, 0, 1);
+       if (ret)
+               goto out_err;
+       return;
+out_err:
+       kfree(dip->csums);
+       kfree(dip);
+free_ordered:
+       /*
+        * If this is a write, we need to clean up the reserved space and kill
+        * the ordered extent.
+        */
+       if (write) {
+               struct btrfs_ordered_extent *ordered;
+               ordered = btrfs_lookup_ordered_extent(inode,
+                                                     dip->logical_offset);
+               if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
+                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
+                       btrfs_free_reserved_extent(root, ordered->start,
+                                                  ordered->disk_len);
+               btrfs_put_ordered_extent(ordered);
+               btrfs_put_ordered_extent(ordered);
+       }
+       bio_endio(bio, ret);
+}
+
+static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
+                       const struct iovec *iov, loff_t offset,
+                       unsigned long nr_segs)
+{
+       int seg;
+       size_t size;
+       unsigned long addr;
+       unsigned blocksize_mask = root->sectorsize - 1;
+       ssize_t retval = -EINVAL;
+       loff_t end = offset;
+
+       if (offset & blocksize_mask)
+               goto out;
+
+       /* Check the memory alignment.  Blocks cannot straddle pages */
+       for (seg = 0; seg < nr_segs; seg++) {
+               addr = (unsigned long)iov[seg].iov_base;
+               size = iov[seg].iov_len;
+               end += size;
+               if ((addr & blocksize_mask) || (size & blocksize_mask)) 
+                       goto out;
+       }
+       retval = 0;
+out:
+       return retval;
+}
 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                        const struct iovec *iov, loff_t offset,
                        unsigned long nr_segs)
 {
-       return -EINVAL;
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       struct btrfs_ordered_extent *ordered;
+       struct extent_state *cached_state = NULL;
+       u64 lockstart, lockend;
+       ssize_t ret;
+       int writing = rw & WRITE;
+       int write_bits = 0;
+       size_t count = iov_length(iov, nr_segs);
+
+       if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
+                           offset, nr_segs)) {
+               return 0;
+       }
+
+       lockstart = offset;
+       lockend = offset + count - 1;
+
+       if (writing) {
+               ret = btrfs_delalloc_reserve_space(inode, count);
+               if (ret)
+                       goto out;
+       }
+
+       while (1) {
+               lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                0, &cached_state, GFP_NOFS);
+               /*
+                * We're concerned with the entire range that we're going to be
+                * doing DIO to, so we need to make sure theres no ordered
+                * extents in this range.
+                */
+               ordered = btrfs_lookup_ordered_range(inode, lockstart,
+                                                    lockend - lockstart + 1);
+               if (!ordered)
+                       break;
+               unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                    &cached_state, GFP_NOFS);
+               btrfs_start_ordered_extent(inode, ordered, 1);
+               btrfs_put_ordered_extent(ordered);
+               cond_resched();
+       }
+
+       /*
+        * we don't use btrfs_set_extent_delalloc because we don't want
+        * the dirty or uptodate bits
+        */
+       if (writing) {
+               write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
+               ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+                                    EXTENT_DELALLOC, 0, NULL, &cached_state,
+                                    GFP_NOFS);
+               if (ret) {
+                       clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+                                        lockend, EXTENT_LOCKED | write_bits,
+                                        1, 0, &cached_state, GFP_NOFS);
+                       goto out;
+               }
+       }
+
+       free_extent_state(cached_state);
+       cached_state = NULL;
+
+       ret = __blockdev_direct_IO(rw, iocb, inode,
+                  BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
+                  iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+                  btrfs_submit_direct, 0);
+
+       if (ret < 0 && ret != -EIOCBQUEUED) {
+               clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
+                             offset + iov_length(iov, nr_segs) - 1,
+                             EXTENT_LOCKED | write_bits, 1, 0,
+                             &cached_state, GFP_NOFS);
+       } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
+               /*
+                * We're falling back to buffered, unlock the section we didn't
+                * do IO on.
+                */
+               clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
+                             offset + iov_length(iov, nr_segs) - 1,
+                             EXTENT_LOCKED | write_bits, 1, 0,
+                             &cached_state, GFP_NOFS);
+       }
+out:
+       free_extent_state(cached_state);
+       return ret;
 }
 
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -5034,7 +6003,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        u64 page_start;
        u64 page_end;
 
-       ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
+       ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
        if (ret) {
                if (ret == -ENOMEM)
                        ret = VM_FAULT_OOM;
@@ -5043,13 +6012,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                goto out;
        }
 
-       ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
-       if (ret) {
-               btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
-               ret = VM_FAULT_SIGBUS;
-               goto out;
-       }
-
        ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
 again:
        lock_page(page);
@@ -5059,7 +6021,6 @@ again:
 
        if ((page->mapping != inode->i_mapping) ||
            (page_start >= size)) {
-               btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
                /* page got truncated out from underneath us */
                goto out_unlock;
        }
@@ -5100,7 +6061,6 @@ again:
                unlock_extent_cached(io_tree, page_start, page_end,
                                     &cached_state, GFP_NOFS);
                ret = VM_FAULT_SIGBUS;
-               btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
                goto out_unlock;
        }
        ret = 0;
@@ -5127,10 +6087,10 @@ again:
        unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
 
 out_unlock:
-       btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
        if (!ret)
                return VM_FAULT_LOCKED;
        unlock_page(page);
+       btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
 out:
        return ret;
 }
@@ -5155,8 +6115,10 @@ static void btrfs_truncate(struct inode *inode)
        btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
        btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
 
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_start_transaction(root, 0);
+       BUG_ON(IS_ERR(trans));
        btrfs_set_trans_block_group(trans, inode);
+       trans->block_rsv = root->orphan_block_rsv;
 
        /*
         * setattr is responsible for setting the ordered_data_close flag,
@@ -5179,6 +6141,23 @@ static void btrfs_truncate(struct inode *inode)
                btrfs_add_ordered_operation(trans, root, inode);
 
        while (1) {
+               if (!trans) {
+                       trans = btrfs_start_transaction(root, 0);
+                       BUG_ON(IS_ERR(trans));
+                       btrfs_set_trans_block_group(trans, inode);
+                       trans->block_rsv = root->orphan_block_rsv;
+               }
+
+               ret = btrfs_block_rsv_check(trans, root,
+                                           root->orphan_block_rsv, 0, 5);
+               if (ret) {
+                       BUG_ON(ret != -EAGAIN);
+                       ret = btrfs_commit_transaction(trans, root);
+                       BUG_ON(ret);
+                       trans = NULL;
+                       continue;
+               }
+
                ret = btrfs_truncate_inode_items(trans, root, inode,
                                                 inode->i_size,
                                                 BTRFS_EXTENT_DATA_KEY);
@@ -5190,10 +6169,8 @@ static void btrfs_truncate(struct inode *inode)
 
                nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
+               trans = NULL;
                btrfs_btree_balance_dirty(root, nr);
-
-               trans = btrfs_start_transaction(root, 1);
-               btrfs_set_trans_block_group(trans, inode);
        }
 
        if (ret == 0 && inode->i_nlink > 0) {
@@ -5254,21 +6231,47 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
 struct inode *btrfs_alloc_inode(struct super_block *sb)
 {
        struct btrfs_inode *ei;
+       struct inode *inode;
 
        ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
        if (!ei)
                return NULL;
+
+       ei->root = NULL;
+       ei->space_info = NULL;
+       ei->generation = 0;
+       ei->sequence = 0;
        ei->last_trans = 0;
        ei->last_sub_trans = 0;
        ei->logged_trans = 0;
-       ei->outstanding_extents = 0;
-       ei->reserved_extents = 0;
-       ei->root = NULL;
+       ei->delalloc_bytes = 0;
+       ei->reserved_bytes = 0;
+       ei->disk_i_size = 0;
+       ei->flags = 0;
+       ei->index_cnt = (u64)-1;
+       ei->last_unlink_trans = 0;
+
        spin_lock_init(&ei->accounting_lock);
+       atomic_set(&ei->outstanding_extents, 0);
+       ei->reserved_extents = 0;
+
+       ei->ordered_data_close = 0;
+       ei->orphan_meta_reserved = 0;
+       ei->dummy_inode = 0;
+       ei->force_compress = 0;
+
+       inode = &ei->vfs_inode;
+       extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
+       extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS);
+       extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS);
+       mutex_init(&ei->log_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
        INIT_LIST_HEAD(&ei->i_orphan);
+       INIT_LIST_HEAD(&ei->delalloc_inodes);
        INIT_LIST_HEAD(&ei->ordered_operations);
-       return &ei->vfs_inode;
+       RB_CLEAR_NODE(&ei->rb_node);
+
+       return inode;
 }
 
 void btrfs_destroy_inode(struct inode *inode)
@@ -5278,6 +6281,8 @@ void btrfs_destroy_inode(struct inode *inode)
 
        WARN_ON(!list_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
+       WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
+       WARN_ON(BTRFS_I(inode)->reserved_extents);
 
        /*
         * This can happen where we create an inode, but somebody else also
@@ -5298,13 +6303,13 @@ void btrfs_destroy_inode(struct inode *inode)
                spin_unlock(&root->fs_info->ordered_extent_lock);
        }
 
-       spin_lock(&root->list_lock);
+       spin_lock(&root->orphan_lock);
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
                printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
                       inode->i_ino);
                list_del_init(&BTRFS_I(inode)->i_orphan);
        }
-       spin_unlock(&root->list_lock);
+       spin_unlock(&root->orphan_lock);
 
        while (1) {
                ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -5425,19 +6430,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (S_ISDIR(old_inode->i_mode) && new_inode &&
            new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                return -ENOTEMPTY;
-
-       /*
-        * We want to reserve the absolute worst case amount of items.  So if
-        * both inodes are subvols and we need to unlink them then that would
-        * require 4 item modifications, but if they are both normal inodes it
-        * would require 5 item modifications, so we'll assume their normal
-        * inodes.  So 5 * 2 is 10, plus 1 for the new link, so 11 total items
-        * should cover the worst case number of items we'll modify.
-        */
-       ret = btrfs_reserve_metadata_space(root, 11);
-       if (ret)
-               return ret;
-
        /*
         * we're using rename to replace one file with another.
         * and the replacement file is large.  Start IO on it now so
@@ -5450,8 +6442,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        /* close the racy window with snapshot create/destroy ioctl */
        if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
                down_read(&root->fs_info->subvol_sem);
+       /*
+        * We want to reserve the absolute worst case amount of items.  So if
+        * both inodes are subvols and we need to unlink them then that would
+        * require 4 item modifications, but if they are both normal inodes it
+        * would require 5 item modifications, so we'll assume their normal
+        * inodes.  So 5 * 2 is 10, plus 1 for the new link, so 11 total items
+        * should cover the worst case number of items we'll modify.
+        */
+       trans = btrfs_start_transaction(root, 20);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
 
-       trans = btrfs_start_transaction(root, 1);
        btrfs_set_trans_block_group(trans, new_dir);
 
        if (dest != root)
@@ -5550,7 +6552,6 @@ out_fail:
        if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
                up_read(&root->fs_info->subvol_sem);
 
-       btrfs_unreserve_metadata_space(root, 11);
        return ret;
 }
 
@@ -5602,6 +6603,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
        return 0;
 }
 
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
+{
+       struct btrfs_inode *binode;
+       struct inode *inode = NULL;
+
+       spin_lock(&root->fs_info->delalloc_lock);
+       while (!list_empty(&root->fs_info->delalloc_inodes)) {
+               binode = list_entry(root->fs_info->delalloc_inodes.next,
+                                   struct btrfs_inode, delalloc_inodes);
+               inode = igrab(&binode->vfs_inode);
+               if (inode) {
+                       list_move_tail(&binode->delalloc_inodes,
+                                      &root->fs_info->delalloc_inodes);
+                       break;
+               }
+
+               list_del_init(&binode->delalloc_inodes);
+               cond_resched_lock(&root->fs_info->delalloc_lock);
+       }
+       spin_unlock(&root->fs_info->delalloc_lock);
+
+       if (inode) {
+               write_inode_now(inode, 0);
+               if (delay_iput)
+                       btrfs_add_delayed_iput(inode);
+               else
+                       iput(inode);
+               return 1;
+       }
+       return 0;
+}
+
 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                         const char *symname)
 {
@@ -5625,26 +6658,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
                return -ENAMETOOLONG;
 
+       err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
+       if (err)
+               return err;
        /*
         * 2 items for inode item and ref
         * 2 items for dir items
         * 1 item for xattr if selinux is on
         */
-       err = btrfs_reserve_metadata_space(root, 5);
-       if (err)
-               return err;
+       trans = btrfs_start_transaction(root, 5);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
 
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans)
-               goto out_fail;
        btrfs_set_trans_block_group(trans, dir);
 
-       err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
-       if (err) {
-               err = -ENOSPC;
-               goto out_unlock;
-       }
-
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
                                dentry->d_name.len,
                                dentry->d_parent->d_inode->i_ino, objectid,
@@ -5716,8 +6743,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 out_unlock:
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
-out_fail:
-       btrfs_unreserve_metadata_space(root, 5);
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
@@ -5726,33 +6751,28 @@ out_fail:
        return err;
 }
 
-static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
-                       u64 alloc_hint, int mode, loff_t actual_len)
+int btrfs_prealloc_file_range(struct inode *inode, int mode,
+                             u64 start, u64 num_bytes, u64 min_size,
+                             loff_t actual_len, u64 *alloc_hint)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key ins;
        u64 cur_offset = start;
-       u64 num_bytes = end - start;
        int ret = 0;
-       u64 i_size;
 
        while (num_bytes > 0) {
-               trans = btrfs_start_transaction(root, 1);
-
-               ret = btrfs_reserve_extent(trans, root, num_bytes,
-                                          root->sectorsize, 0, alloc_hint,
-                                          (u64)-1, &ins, 1);
-               if (ret) {
-                       WARN_ON(1);
-                       goto stop_trans;
+               trans = btrfs_start_transaction(root, 3);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+                       break;
                }
 
-               ret = btrfs_reserve_metadata_space(root, 3);
+               ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
+                                          0, *alloc_hint, (u64)-1, &ins, 1);
                if (ret) {
-                       btrfs_free_reserved_extent(root, ins.objectid,
-                                                  ins.offset);
-                       goto stop_trans;
+                       btrfs_end_transaction(trans, root);
+                       break;
                }
 
                ret = insert_reserved_file_extent(trans, inode,
@@ -5766,34 +6786,27 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
 
                num_bytes -= ins.offset;
                cur_offset += ins.offset;
-               alloc_hint = ins.objectid + ins.offset;
+               *alloc_hint = ins.objectid + ins.offset;
 
                inode->i_ctime = CURRENT_TIME;
                BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
                if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-                       (actual_len > inode->i_size) &&
-                       (cur_offset > inode->i_size)) {
-
+                   (actual_len > inode->i_size) &&
+                   (cur_offset > inode->i_size)) {
                        if (cur_offset > actual_len)
-                               i_size  = actual_len;
+                               i_size_write(inode, actual_len);
                        else
-                               i_size = cur_offset;
-                       i_size_write(inode, i_size);
-                       btrfs_ordered_update_i_size(inode, i_size, NULL);
+                               i_size_write(inode, cur_offset);
+                       i_size_write(inode, cur_offset);
+                       btrfs_ordered_update_i_size(inode, cur_offset, NULL);
                }
 
                ret = btrfs_update_inode(trans, root, inode);
                BUG_ON(ret);
 
                btrfs_end_transaction(trans, root);
-               btrfs_unreserve_metadata_space(root, 3);
        }
        return ret;
-
-stop_trans:
-       btrfs_end_transaction(trans, root);
-       return ret;
-
 }
 
 static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5826,8 +6839,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
                        goto out;
        }
 
-       ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
-                                         alloc_end - alloc_start);
+       ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
        if (ret)
                goto out;
 
@@ -5872,16 +6884,16 @@ static long btrfs_fallocate(struct inode *inode, int mode,
                if (em->block_start == EXTENT_MAP_HOLE ||
                    (cur_offset >= inode->i_size &&
                     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
-                       ret = prealloc_file_range(inode,
-                                                 cur_offset, last_byte,
-                                               alloc_hint, mode, offset+len);
+                       ret = btrfs_prealloc_file_range(inode, 0, cur_offset,
+                                                       last_byte - cur_offset,
+                                                       1 << inode->i_blkbits,
+                                                       offset + len,
+                                                       &alloc_hint);
                        if (ret < 0) {
                                free_extent_map(em);
                                break;
                        }
                }
-               if (em->block_start <= EXTENT_MAP_LAST_BYTE)
-                       alloc_hint = em->block_start;
                free_extent_map(em);
 
                cur_offset = last_byte;
@@ -5893,8 +6905,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
                             &cached_state, GFP_NOFS);
 
-       btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
-                                      alloc_end - alloc_start);
+       btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
 out:
        mutex_unlock(&inode->i_mutex);
        return ret;
index 97a9783..4cdb98c 100644 (file)
@@ -239,23 +239,19 @@ static noinline int create_subvol(struct btrfs_root *root,
        u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
        u64 index = 0;
 
+       ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root,
+                                      0, &objectid);
+       if (ret)
+               return ret;
        /*
         * 1 - inode item
         * 2 - refs
         * 1 - root item
         * 2 - dir items
         */
-       ret = btrfs_reserve_metadata_space(root, 6);
-       if (ret)
-               return ret;
-
-       trans = btrfs_start_transaction(root, 1);
-       BUG_ON(!trans);
-
-       ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
-                                      0, &objectid);
-       if (ret)
-               goto fail;
+       trans = btrfs_start_transaction(root, 6);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
 
        leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
                                      0, objectid, NULL, 0, 0, 0);
@@ -345,13 +341,10 @@ fail:
        err = btrfs_commit_transaction(trans, root);
        if (err && !ret)
                ret = err;
-
-       btrfs_unreserve_metadata_space(root, 6);
        return ret;
 }
 
-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
-                          char *name, int namelen)
+static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
 {
        struct inode *inode;
        struct btrfs_pending_snapshot *pending_snapshot;
@@ -361,40 +354,33 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
        if (!root->ref_cows)
                return -EINVAL;
 
-       /*
-        * 1 - inode item
-        * 2 - refs
-        * 1 - root item
-        * 2 - dir items
-        */
-       ret = btrfs_reserve_metadata_space(root, 6);
-       if (ret)
-               goto fail;
-
        pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
-       if (!pending_snapshot) {
-               ret = -ENOMEM;
-               btrfs_unreserve_metadata_space(root, 6);
-               goto fail;
-       }
-       pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
-       if (!pending_snapshot->name) {
-               ret = -ENOMEM;
-               kfree(pending_snapshot);
-               btrfs_unreserve_metadata_space(root, 6);
-               goto fail;
-       }
-       memcpy(pending_snapshot->name, name, namelen);
-       pending_snapshot->name[namelen] = '\0';
+       if (!pending_snapshot)
+               return -ENOMEM;
+
+       btrfs_init_block_rsv(&pending_snapshot->block_rsv);
        pending_snapshot->dentry = dentry;
-       trans = btrfs_start_transaction(root, 1);
-       BUG_ON(!trans);
        pending_snapshot->root = root;
+
+       trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
+               goto fail;
+       }
+
+       ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
+       BUG_ON(ret);
+
        list_add(&pending_snapshot->list,
                 &trans->transaction->pending_snapshots);
-       ret = btrfs_commit_transaction(trans, root);
+       ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
        BUG_ON(ret);
-       btrfs_unreserve_metadata_space(root, 6);
+
+       ret = pending_snapshot->error;
+       if (ret)
+               goto fail;
+
+       btrfs_orphan_cleanup(pending_snapshot->snap);
 
        inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
        if (IS_ERR(inode)) {
@@ -405,6 +391,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
        d_instantiate(dentry, inode);
        ret = 0;
 fail:
+       kfree(pending_snapshot);
        return ret;
 }
 
@@ -456,8 +443,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
                goto out_up_read;
 
        if (snap_src) {
-               error = create_snapshot(snap_src, dentry,
-                                       name, namelen);
+               error = create_snapshot(snap_src, dentry);
        } else {
                error = create_subvol(BTRFS_I(dir)->root, dentry,
                                      name, namelen);
@@ -601,19 +587,9 @@ static int btrfs_defrag_file(struct file *file,
                if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
                        BTRFS_I(inode)->force_compress = 1;
 
-               ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
-               if (ret) {
-                       ret = -ENOSPC;
-                       break;
-               }
-
-               ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
-               if (ret) {
-                       btrfs_free_reserved_data_space(root, inode,
-                                                      PAGE_CACHE_SIZE);
-                       ret = -ENOSPC;
-                       break;
-               }
+               ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+               if (ret)
+                       goto err_unlock;
 again:
                if (inode->i_size == 0 ||
                    i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
@@ -622,8 +598,10 @@ again:
                }
 
                page = grab_cache_page(inode->i_mapping, i);
-               if (!page)
+               if (!page) {
+                       ret = -ENOMEM;
                        goto err_reservations;
+               }
 
                if (!PageUptodate(page)) {
                        btrfs_readpage(NULL, page);
@@ -631,6 +609,7 @@ again:
                        if (!PageUptodate(page)) {
                                unlock_page(page);
                                page_cache_release(page);
+                               ret = -EIO;
                                goto err_reservations;
                        }
                }
@@ -644,8 +623,7 @@ again:
                wait_on_page_writeback(page);
 
                if (PageDirty(page)) {
-                       btrfs_free_reserved_data_space(root, inode,
-                                                      PAGE_CACHE_SIZE);
+                       btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
                        goto loop_unlock;
                }
 
@@ -683,7 +661,6 @@ loop_unlock:
                page_cache_release(page);
                mutex_unlock(&inode->i_mutex);
 
-               btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
                balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
                i++;
        }
@@ -713,9 +690,9 @@ loop_unlock:
        return 0;
 
 err_reservations:
+       btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+err_unlock:
        mutex_unlock(&inode->i_mutex);
-       btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
-       btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
        return ret;
 }
 
@@ -811,7 +788,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
                device->name, (unsigned long long)new_size);
 
        if (new_size > old_size) {
-               trans = btrfs_start_transaction(root, 1);
+               trans = btrfs_start_transaction(root, 0);
                ret = btrfs_grow_device(trans, device, new_size);
                btrfs_commit_transaction(trans, root);
        } else {
@@ -1300,7 +1277,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        if (err)
                goto out_up_write;
 
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               goto out;
+       }
+       trans->block_rsv = &root->fs_info->global_block_rsv;
+
        ret = btrfs_unlink_subvol(trans, root, dir,
                                dest->root_key.objectid,
                                dentry->d_name.name,
@@ -1314,10 +1297,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        dest->root_item.drop_level = 0;
        btrfs_set_root_refs(&dest->root_item, 0);
 
-       ret = btrfs_insert_orphan_item(trans,
-                               root->fs_info->tree_root,
-                               dest->root_key.objectid);
-       BUG_ON(ret);
+       if (!xchg(&dest->orphan_item_inserted, 1)) {
+               ret = btrfs_insert_orphan_item(trans,
+                                       root->fs_info->tree_root,
+                                       dest->root_key.objectid);
+               BUG_ON(ret);
+       }
 
        ret = btrfs_commit_transaction(trans, root);
        BUG_ON(ret);
@@ -1358,8 +1343,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
                        ret = -EPERM;
                        goto out;
                }
-               btrfs_defrag_root(root, 0);
-               btrfs_defrag_root(root->fs_info->extent_root, 0);
+               ret = btrfs_defrag_root(root, 0);
+               if (ret)
+                       goto out;
+               ret = btrfs_defrag_root(root->fs_info->extent_root, 0);
                break;
        case S_IFREG:
                if (!(file->f_mode & FMODE_WRITE)) {
@@ -1389,9 +1376,11 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
                        /* the rest are all set to zero by kzalloc */
                        range->len = (u64)-1;
                }
-               btrfs_defrag_file(file, range);
+               ret = btrfs_defrag_file(file, range);
                kfree(range);
                break;
+       default:
+               ret = -EINVAL;
        }
 out:
        mnt_drop_write(file->f_path.mnt);
@@ -1550,12 +1539,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                btrfs_wait_ordered_range(src, off, off+len);
        }
 
-       trans = btrfs_start_transaction(root, 1);
-       BUG_ON(!trans);
-
-       /* punch hole in destination first */
-       btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
-
        /* clone data */
        key.objectid = src->i_ino;
        key.type = BTRFS_EXTENT_DATA_KEY;
@@ -1566,7 +1549,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                 * note the key will change type as we walk through the
                 * tree.
                 */
-               ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+               ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
                if (ret < 0)
                        goto out;
 
@@ -1629,12 +1612,31 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        new_key.objectid = inode->i_ino;
                        new_key.offset = key.offset + destoff - off;
 
+                       trans = btrfs_start_transaction(root, 1);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               goto out;
+                       }
+
                        if (type == BTRFS_FILE_EXTENT_REG ||
                            type == BTRFS_FILE_EXTENT_PREALLOC) {
+                               if (off > key.offset) {
+                                       datao += off - key.offset;
+                                       datal -= off - key.offset;
+                               }
+
+                               if (key.offset + datal > off + len)
+                                       datal = off + len - key.offset;
+
+                               ret = btrfs_drop_extents(trans, inode,
+                                                        new_key.offset,
+                                                        new_key.offset + datal,
+                                                        &hint_byte, 1);
+                               BUG_ON(ret);
+
                                ret = btrfs_insert_empty_item(trans, root, path,
                                                              &new_key, size);
-                               if (ret)
-                                       goto out;
+                               BUG_ON(ret);
 
                                leaf = path->nodes[0];
                                slot = path->slots[0];
@@ -1645,14 +1647,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                                extent = btrfs_item_ptr(leaf, slot,
                                                struct btrfs_file_extent_item);
 
-                               if (off > key.offset) {
-                                       datao += off - key.offset;
-                                       datal -= off - key.offset;
-                               }
-
-                               if (key.offset + datal > off + len)
-                                       datal = off + len - key.offset;
-
                                /* disko == 0 means it's a hole */
                                if (!disko)
                                        datao = 0;
@@ -1683,14 +1677,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 
                                if (comp && (skip || trim)) {
                                        ret = -EINVAL;
+                                       btrfs_end_transaction(trans, root);
                                        goto out;
                                }
                                size -= skip + trim;
                                datal -= skip + trim;
+
+                               ret = btrfs_drop_extents(trans, inode,
+                                                        new_key.offset,
+                                                        new_key.offset + datal,
+                                                        &hint_byte, 1);
+                               BUG_ON(ret);
+
                                ret = btrfs_insert_empty_item(trans, root, path,
                                                              &new_key, size);
-                               if (ret)
-                                       goto out;
+                               BUG_ON(ret);
 
                                if (skip) {
                                        u32 start =
@@ -1708,8 +1709,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        }
 
                        btrfs_mark_buffer_dirty(leaf);
-               }
+                       btrfs_release_path(root, path);
 
+                       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+                       if (new_key.offset + datal > inode->i_size)
+                               btrfs_i_size_write(inode,
+                                                  new_key.offset + datal);
+                       BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
+                       ret = btrfs_update_inode(trans, root, inode);
+                       BUG_ON(ret);
+                       btrfs_end_transaction(trans, root);
+               }
 next:
                btrfs_release_path(root, path);
                key.offset++;
@@ -1717,17 +1727,7 @@ next:
        ret = 0;
 out:
        btrfs_release_path(root, path);
-       if (ret == 0) {
-               inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-               if (destoff + olen > inode->i_size)
-                       btrfs_i_size_write(inode, destoff + olen);
-               BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
-               ret = btrfs_update_inode(trans, root, inode);
-       }
-       btrfs_end_transaction(trans, root);
        unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
-       if (ret)
-               vmtruncate(inode, 0);
 out_unlock:
        mutex_unlock(&src->i_mutex);
        mutex_unlock(&inode->i_mutex);
index a127c0e..e56c72b 100644 (file)
@@ -124,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
        return 1;
 }
 
+static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
+                         u64 len)
+{
+       if (file_offset + len <= entry->file_offset ||
+           entry->file_offset + entry->len <= file_offset)
+               return 0;
+       return 1;
+}
+
 /*
  * look find the first ordered struct that has this offset, otherwise
  * the first one less than this offset
@@ -161,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
  * The tree is given a single reference on the ordered extent that was
  * inserted.
  */
-int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
-                            u64 start, u64 len, u64 disk_len, int type)
+static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
+                                     u64 start, u64 len, u64 disk_len,
+                                     int type, int dio)
 {
        struct btrfs_ordered_inode_tree *tree;
        struct rb_node *node;
@@ -182,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
                set_bit(type, &entry->flags);
 
+       if (dio)
+               set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
+
        /* one ref for the tree */
        atomic_set(&entry->refs, 1);
        init_waitqueue_head(&entry->wait);
@@ -203,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        return 0;
 }
 
+int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
+                            u64 start, u64 len, u64 disk_len, int type)
+{
+       return __btrfs_add_ordered_extent(inode, file_offset, start, len,
+                                         disk_len, type, 0);
+}
+
+int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
+                                u64 start, u64 len, u64 disk_len, int type)
+{
+       return __btrfs_add_ordered_extent(inode, file_offset, start, len,
+                                         disk_len, type, 1);
+}
+
 /*
  * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
  * when an ordered extent is finished.  If the list covers more than one
@@ -311,13 +338,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
        tree->last = NULL;
        set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
 
-       spin_lock(&BTRFS_I(inode)->accounting_lock);
-       WARN_ON(!BTRFS_I(inode)->outstanding_extents);
-       BTRFS_I(inode)->outstanding_extents--;
-       spin_unlock(&BTRFS_I(inode)->accounting_lock);
-       btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
-                                             inode, 1);
-
        spin_lock(&root->fs_info->ordered_extent_lock);
        list_del_init(&entry->root_extent_list);
 
@@ -491,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode,
         * start IO on any dirty ones so the wait doesn't stall waiting
         * for pdflush to find them
         */
-       filemap_fdatawrite_range(inode->i_mapping, start, end);
+       if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
+               filemap_fdatawrite_range(inode->i_mapping, start, end);
        if (wait) {
                wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
                                                 &entry->flags));
@@ -588,6 +609,47 @@ out:
        return entry;
 }
 
+/* Since the DIO code tries to lock a wide area we need to look for any ordered
+ * extents that exist in the range, rather than just the start of the range.
+ */
+struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
+                                                       u64 file_offset,
+                                                       u64 len)
+{
+       struct btrfs_ordered_inode_tree *tree;
+       struct rb_node *node;
+       struct btrfs_ordered_extent *entry = NULL;
+
+       tree = &BTRFS_I(inode)->ordered_tree;
+       spin_lock(&tree->lock);
+       node = tree_search(tree, file_offset);
+       if (!node) {
+               node = tree_search(tree, file_offset + len);
+               if (!node)
+                       goto out;
+       }
+
+       while (1) {
+               entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+               if (range_overlaps(entry, file_offset, len))
+                       break;
+
+               if (entry->file_offset >= file_offset + len) {
+                       entry = NULL;
+                       break;
+               }
+               entry = NULL;
+               node = rb_next(node);
+               if (!node)
+                       break;
+       }
+out:
+       if (entry)
+               atomic_inc(&entry->refs);
+       spin_unlock(&tree->lock);
+       return entry;
+}
+
 /*
  * lookup and return any extent before 'file_offset'.  NULL is returned
  * if none is found
index c82f76a..8ac3654 100644 (file)
@@ -72,6 +72,8 @@ struct btrfs_ordered_sum {
 
 #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
 
+#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
+
 struct btrfs_ordered_extent {
        /* logical offset in the file */
        u64 file_offset;
@@ -140,7 +142,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
                                   struct btrfs_ordered_extent **cached,
                                   u64 file_offset, u64 io_size);
 int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
-                            u64 start, u64 len, u64 disk_len, int tyep);
+                            u64 start, u64 len, u64 disk_len, int type);
+int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
+                                u64 start, u64 len, u64 disk_len, int type);
 int btrfs_add_ordered_sum(struct inode *inode,
                          struct btrfs_ordered_extent *entry,
                          struct btrfs_ordered_sum *sum);
@@ -151,6 +155,9 @@ void btrfs_start_ordered_extent(struct inode *inode,
 int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
 struct btrfs_ordered_extent *
 btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
+struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
+                                                       u64 file_offset,
+                                                       u64 len);
 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
                                struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
index e558dd9..05d41e5 100644 (file)
@@ -44,8 +44,12 @@ struct tree_entry {
 struct backref_node {
        struct rb_node rb_node;
        u64 bytenr;
-       /* objectid tree block owner */
+
+       u64 new_bytenr;
+       /* objectid of tree block owner, can be not uptodate */
        u64 owner;
+       /* link to pending, changed or detached list */
+       struct list_head list;
        /* list of upper level blocks reference this block */
        struct list_head upper;
        /* list of child blocks in the cache */
@@ -56,9 +60,9 @@ struct backref_node {
        struct extent_buffer *eb;
        /* level of tree block */
        unsigned int level:8;
-       /* 1 if the block is root of old snapshot */
-       unsigned int old_root:1;
-       /* 1 if no child blocks in the cache */
+       /* is the block in non-reference counted tree */
+       unsigned int cowonly:1;
+       /* 1 if no child node in the cache */
        unsigned int lowest:1;
        /* is the extent buffer locked */
        unsigned int locked:1;
@@ -66,6 +70,16 @@ struct backref_node {
        unsigned int processed:1;
        /* have backrefs of this block been checked */
        unsigned int checked:1;
+       /*
+        * 1 if corresponding block has been cowed but some upper
+        * level block pointers may not point to the new location
+        */
+       unsigned int pending:1;
+       /*
+        * 1 if the backref node isn't connected to any other
+        * backref node.
+        */
+       unsigned int detached:1;
 };
 
 /*
@@ -74,7 +88,6 @@ struct backref_node {
 struct backref_edge {
        struct list_head list[2];
        struct backref_node *node[2];
-       u64 blockptr;
 };
 
 #define LOWER  0
@@ -83,9 +96,25 @@ struct backref_edge {
 struct backref_cache {
        /* red black tree of all backref nodes in the cache */
        struct rb_root rb_root;
-       /* list of backref nodes with no child block in the cache */
+       /* for passing backref nodes to btrfs_reloc_cow_block */
+       struct backref_node *path[BTRFS_MAX_LEVEL];
+       /*
+        * list of blocks that have been cowed but some block
+        * pointers in upper level blocks may not reflect the
+        * new location
+        */
        struct list_head pending[BTRFS_MAX_LEVEL];
-       spinlock_t lock;
+       /* list of backref nodes with no child node */
+       struct list_head leaves;
+       /* list of blocks that have been cowed in current transaction */
+       struct list_head changed;
+       /* list of detached backref node. */
+       struct list_head detached;
+
+       u64 last_trans;
+
+       int nr_nodes;
+       int nr_edges;
 };
 
 /*
@@ -113,15 +142,6 @@ struct tree_block {
        unsigned int key_ready:1;
 };
 
-/* inode vector */
-#define INODEVEC_SIZE 16
-
-struct inodevec {
-       struct list_head list;
-       struct inode *inode[INODEVEC_SIZE];
-       int nr;
-};
-
 #define MAX_EXTENTS 128
 
 struct file_extent_cluster {
@@ -138,36 +158,43 @@ struct reloc_control {
        struct btrfs_root *extent_root;
        /* inode for moving data */
        struct inode *data_inode;
-       struct btrfs_workers workers;
+
+       struct btrfs_block_rsv *block_rsv;
+
+       struct backref_cache backref_cache;
+
+       struct file_extent_cluster cluster;
        /* tree blocks have been processed */
        struct extent_io_tree processed_blocks;
        /* map start of tree root to corresponding reloc tree */
        struct mapping_tree reloc_root_tree;
        /* list of reloc trees */
        struct list_head reloc_roots;
+       /* size of metadata reservation for merging reloc trees */
+       u64 merging_rsv_size;
+       /* size of relocated tree nodes */
+       u64 nodes_relocated;
+
        u64 search_start;
        u64 extents_found;
-       u64 extents_skipped;
-       int stage;
-       int create_reloc_root;
+
+       int block_rsv_retries;
+
+       unsigned int stage:8;
+       unsigned int create_reloc_tree:1;
+       unsigned int merge_reloc_tree:1;
        unsigned int found_file_extent:1;
-       unsigned int found_old_snapshot:1;
+       unsigned int commit_transaction:1;
 };
 
 /* stages of data relocation */
 #define MOVE_DATA_EXTENTS      0
 #define UPDATE_DATA_PTRS       1
 
-/*
- * merge reloc tree to corresponding fs tree in worker threads
- */
-struct async_merge {
-       struct btrfs_work work;
-       struct reloc_control *rc;
-       struct btrfs_root *root;
-       struct completion *done;
-       atomic_t *num_pending;
-};
+static void remove_backref_node(struct backref_cache *cache,
+                               struct backref_node *node);
+static void __mark_block_processed(struct reloc_control *rc,
+                                  struct backref_node *node);
 
 static void mapping_tree_init(struct mapping_tree *tree)
 {
@@ -181,15 +208,80 @@ static void backref_cache_init(struct backref_cache *cache)
        cache->rb_root = RB_ROOT;
        for (i = 0; i < BTRFS_MAX_LEVEL; i++)
                INIT_LIST_HEAD(&cache->pending[i]);
-       spin_lock_init(&cache->lock);
+       INIT_LIST_HEAD(&cache->changed);
+       INIT_LIST_HEAD(&cache->detached);
+       INIT_LIST_HEAD(&cache->leaves);
+}
+
+static void backref_cache_cleanup(struct backref_cache *cache)
+{
+       struct backref_node *node;
+       int i;
+
+       while (!list_empty(&cache->detached)) {
+               node = list_entry(cache->detached.next,
+                                 struct backref_node, list);
+               remove_backref_node(cache, node);
+       }
+
+       while (!list_empty(&cache->leaves)) {
+               node = list_entry(cache->leaves.next,
+                                 struct backref_node, lower);
+               remove_backref_node(cache, node);
+       }
+
+       cache->last_trans = 0;
+
+       for (i = 0; i < BTRFS_MAX_LEVEL; i++)
+               BUG_ON(!list_empty(&cache->pending[i]));
+       BUG_ON(!list_empty(&cache->changed));
+       BUG_ON(!list_empty(&cache->detached));
+       BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
+       BUG_ON(cache->nr_nodes);
+       BUG_ON(cache->nr_edges);
+}
+
+static struct backref_node *alloc_backref_node(struct backref_cache *cache)
+{
+       struct backref_node *node;
+
+       node = kzalloc(sizeof(*node), GFP_NOFS);
+       if (node) {
+               INIT_LIST_HEAD(&node->list);
+               INIT_LIST_HEAD(&node->upper);
+               INIT_LIST_HEAD(&node->lower);
+               RB_CLEAR_NODE(&node->rb_node);
+               cache->nr_nodes++;
+       }
+       return node;
+}
+
+static void free_backref_node(struct backref_cache *cache,
+                             struct backref_node *node)
+{
+       if (node) {
+               cache->nr_nodes--;
+               kfree(node);
+       }
+}
+
+static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
+{
+       struct backref_edge *edge;
+
+       edge = kzalloc(sizeof(*edge), GFP_NOFS);
+       if (edge)
+               cache->nr_edges++;
+       return edge;
 }
 
-static void backref_node_init(struct backref_node *node)
+static void free_backref_edge(struct backref_cache *cache,
+                             struct backref_edge *edge)
 {
-       memset(node, 0, sizeof(*node));
-       INIT_LIST_HEAD(&node->upper);
-       INIT_LIST_HEAD(&node->lower);
-       RB_CLEAR_NODE(&node->rb_node);
+       if (edge) {
+               cache->nr_edges--;
+               kfree(edge);
+       }
 }
 
 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
@@ -250,6 +342,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node,
                edges[idx++] = edge;
                node = edge->node[UPPER];
        }
+       BUG_ON(node->detached);
        *index = idx;
        return node;
 }
@@ -281,13 +374,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[],
        return NULL;
 }
 
+static void unlock_node_buffer(struct backref_node *node)
+{
+       if (node->locked) {
+               btrfs_tree_unlock(node->eb);
+               node->locked = 0;
+       }
+}
+
 static void drop_node_buffer(struct backref_node *node)
 {
        if (node->eb) {
-               if (node->locked) {
-                       btrfs_tree_unlock(node->eb);
-                       node->locked = 0;
-               }
+               unlock_node_buffer(node);
                free_extent_buffer(node->eb);
                node->eb = NULL;
        }
@@ -296,14 +394,14 @@ static void drop_node_buffer(struct backref_node *node)
 static void drop_backref_node(struct backref_cache *tree,
                              struct backref_node *node)
 {
-       BUG_ON(!node->lowest);
        BUG_ON(!list_empty(&node->upper));
 
        drop_node_buffer(node);
+       list_del(&node->list);
        list_del(&node->lower);
-
-       rb_erase(&node->rb_node, &tree->rb_root);
-       kfree(node);
+       if (!RB_EMPTY_NODE(&node->rb_node))
+               rb_erase(&node->rb_node, &tree->rb_root);
+       free_backref_node(tree, node);
 }
 
 /*
@@ -318,27 +416,121 @@ static void remove_backref_node(struct backref_cache *cache,
        if (!node)
                return;
 
-       BUG_ON(!node->lowest);
+       BUG_ON(!node->lowest && !node->detached);
        while (!list_empty(&node->upper)) {
                edge = list_entry(node->upper.next, struct backref_edge,
                                  list[LOWER]);
                upper = edge->node[UPPER];
                list_del(&edge->list[LOWER]);
                list_del(&edge->list[UPPER]);
-               kfree(edge);
+               free_backref_edge(cache, edge);
+
+               if (RB_EMPTY_NODE(&upper->rb_node)) {
+                       BUG_ON(!list_empty(&node->upper));
+                       drop_backref_node(cache, node);
+                       node = upper;
+                       node->lowest = 1;
+                       continue;
+               }
                /*
-                * add the node to pending list if no other
+                * add the node to leaf node list if no other
                 * child block cached.
                 */
                if (list_empty(&upper->lower)) {
-                       list_add_tail(&upper->lower,
-                                     &cache->pending[upper->level]);
+                       list_add_tail(&upper->lower, &cache->leaves);
                        upper->lowest = 1;
                }
        }
+
        drop_backref_node(cache, node);
 }
 
+static void update_backref_node(struct backref_cache *cache,
+                               struct backref_node *node, u64 bytenr)
+{
+       struct rb_node *rb_node;
+       rb_erase(&node->rb_node, &cache->rb_root);
+       node->bytenr = bytenr;
+       rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
+       BUG_ON(rb_node);
+}
+
+/*
+ * update backref cache after a transaction commit
+ */
+static int update_backref_cache(struct btrfs_trans_handle *trans,
+                               struct backref_cache *cache)
+{
+       struct backref_node *node;
+       int level = 0;
+
+       if (cache->last_trans == 0) {
+               cache->last_trans = trans->transid;
+               return 0;
+       }
+
+       if (cache->last_trans == trans->transid)
+               return 0;
+
+       /*
+        * detached nodes are used to avoid unnecessary backref
+        * lookup. transaction commit changes the extent tree.
+        * so the detached nodes are no longer useful.
+        */
+       while (!list_empty(&cache->detached)) {
+               node = list_entry(cache->detached.next,
+                                 struct backref_node, list);
+               remove_backref_node(cache, node);
+       }
+
+       while (!list_empty(&cache->changed)) {
+               node = list_entry(cache->changed.next,
+                                 struct backref_node, list);
+               list_del_init(&node->list);
+               BUG_ON(node->pending);
+               update_backref_node(cache, node, node->new_bytenr);
+       }
+
+       /*
+        * some nodes can be left in the pending list if there were
+        * errors during processing the pending nodes.
+        */
+       for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+               list_for_each_entry(node, &cache->pending[level], list) {
+                       BUG_ON(!node->pending);
+                       if (node->bytenr == node->new_bytenr)
+                               continue;
+                       update_backref_node(cache, node, node->new_bytenr);
+               }
+       }
+
+       cache->last_trans = 0;
+       return 1;
+}
+
+static int should_ignore_root(struct btrfs_root *root)
+{
+       struct btrfs_root *reloc_root;
+
+       if (!root->ref_cows)
+               return 0;
+
+       reloc_root = root->reloc_root;
+       if (!reloc_root)
+               return 0;
+
+       if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
+           root->fs_info->running_transaction->transid - 1)
+               return 0;
+       /*
+        * if there is reloc tree and it was created in previous
+        * transaction backref lookup can find the reloc tree,
+        * so backref node for the fs tree root is useless for
+        * relocation.
+        */
+       return 1;
+}
+
 /*
  * find reloc tree by address of tree root
  */
@@ -453,11 +645,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
  * for all upper level blocks that directly/indirectly reference the
  * block are also cached.
  */
-static struct backref_node *build_backref_tree(struct reloc_control *rc,
-                                              struct backref_cache *cache,
-                                              struct btrfs_key *node_key,
-                                              int level, u64 bytenr)
+static noinline_for_stack
+struct backref_node *build_backref_tree(struct reloc_control *rc,
+                                       struct btrfs_key *node_key,
+                                       int level, u64 bytenr)
 {
+       struct backref_cache *cache = &rc->backref_cache;
        struct btrfs_path *path1;
        struct btrfs_path *path2;
        struct extent_buffer *eb;
@@ -473,6 +666,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc,
        unsigned long end;
        unsigned long ptr;
        LIST_HEAD(list);
+       LIST_HEAD(useless);
+       int cowonly;
        int ret;
        int err = 0;
 
@@ -483,15 +678,13 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc,
                goto out;
        }
 
-       node = kmalloc(sizeof(*node), GFP_NOFS);
+       node = alloc_backref_node(cache);
        if (!node) {
                err = -ENOMEM;
                goto out;
        }
 
-       backref_node_init(node);
        node->bytenr = bytenr;
-       node->owner = 0;
        node->level = level;
        node->lowest = 1;
        cur = node;
@@ -587,17 +780,20 @@ again:
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
                if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
                    key.type == BTRFS_EXTENT_REF_V0_KEY) {
-                       if (key.objectid == key.offset &&
-                           key.type == BTRFS_EXTENT_REF_V0_KEY) {
+                       if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
                                struct btrfs_extent_ref_v0 *ref0;
                                ref0 = btrfs_item_ptr(eb, path1->slots[0],
                                                struct btrfs_extent_ref_v0);
                                root = find_tree_root(rc, eb, ref0);
-                               if (root)
-                                       cur->root = root;
-                               else
-                                       cur->old_root = 1;
-                               break;
+                               if (!root->ref_cows)
+                                       cur->cowonly = 1;
+                               if (key.objectid == key.offset) {
+                                       if (root && !should_ignore_root(root))
+                                               cur->root = root;
+                                       else
+                                               list_add(&cur->list, &useless);
+                                       break;
+                               }
                        }
 #else
                BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
@@ -614,22 +810,20 @@ again:
                                break;
                        }
 
-                       edge = kzalloc(sizeof(*edge), GFP_NOFS);
+                       edge = alloc_backref_edge(cache);
                        if (!edge) {
                                err = -ENOMEM;
                                goto out;
                        }
                        rb_node = tree_search(&cache->rb_root, key.offset);
                        if (!rb_node) {
-                               upper = kmalloc(sizeof(*upper), GFP_NOFS);
+                               upper = alloc_backref_node(cache);
                                if (!upper) {
-                                       kfree(edge);
+                                       free_backref_edge(cache, edge);
                                        err = -ENOMEM;
                                        goto out;
                                }
-                               backref_node_init(upper);
                                upper->bytenr = key.offset;
-                               upper->owner = 0;
                                upper->level = cur->level + 1;
                                /*
                                 *  backrefs for the upper level block isn't
@@ -639,11 +833,12 @@ again:
                        } else {
                                upper = rb_entry(rb_node, struct backref_node,
                                                 rb_node);
+                               BUG_ON(!upper->checked);
                                INIT_LIST_HEAD(&edge->list[UPPER]);
                        }
-                       list_add(&edge->list[LOWER], &cur->upper);
-                       edge->node[UPPER] = upper;
+                       list_add_tail(&edge->list[LOWER], &cur->upper);
                        edge->node[LOWER] = cur;
+                       edge->node[UPPER] = upper;
 
                        goto next;
                } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
@@ -657,11 +852,17 @@ again:
                        goto out;
                }
 
+               if (!root->ref_cows)
+                       cur->cowonly = 1;
+
                if (btrfs_root_level(&root->root_item) == cur->level) {
                        /* tree root */
                        BUG_ON(btrfs_root_bytenr(&root->root_item) !=
                               cur->bytenr);
-                       cur->root = root;
+                       if (should_ignore_root(root))
+                               list_add(&cur->list, &useless);
+                       else
+                               cur->root = root;
                        break;
                }
 
@@ -692,11 +893,14 @@ again:
                        if (!path2->nodes[level]) {
                                BUG_ON(btrfs_root_bytenr(&root->root_item) !=
                                       lower->bytenr);
-                               lower->root = root;
+                               if (should_ignore_root(root))
+                                       list_add(&lower->list, &useless);
+                               else
+                                       lower->root = root;
                                break;
                        }
 
-                       edge = kzalloc(sizeof(*edge), GFP_NOFS);
+                       edge = alloc_backref_edge(cache);
                        if (!edge) {
                                err = -ENOMEM;
                                goto out;
@@ -705,16 +909,17 @@ again:
                        eb = path2->nodes[level];
                        rb_node = tree_search(&cache->rb_root, eb->start);
                        if (!rb_node) {
-                               upper = kmalloc(sizeof(*upper), GFP_NOFS);
+                               upper = alloc_backref_node(cache);
                                if (!upper) {
-                                       kfree(edge);
+                                       free_backref_edge(cache, edge);
                                        err = -ENOMEM;
                                        goto out;
                                }
-                               backref_node_init(upper);
                                upper->bytenr = eb->start;
                                upper->owner = btrfs_header_owner(eb);
                                upper->level = lower->level + 1;
+                               if (!root->ref_cows)
+                                       upper->cowonly = 1;
 
                                /*
                                 * if we know the block isn't shared
@@ -744,10 +949,12 @@ again:
                                                 rb_node);
                                BUG_ON(!upper->checked);
                                INIT_LIST_HEAD(&edge->list[UPPER]);
+                               if (!upper->owner)
+                                       upper->owner = btrfs_header_owner(eb);
                        }
                        list_add_tail(&edge->list[LOWER], &lower->upper);
-                       edge->node[UPPER] = upper;
                        edge->node[LOWER] = lower;
+                       edge->node[UPPER] = upper;
 
                        if (rb_node)
                                break;
@@ -785,8 +992,13 @@ next:
         * into the cache.
         */
        BUG_ON(!node->checked);
-       rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
-       BUG_ON(rb_node);
+       cowonly = node->cowonly;
+       if (!cowonly) {
+               rb_node = tree_insert(&cache->rb_root, node->bytenr,
+                                     &node->rb_node);
+               BUG_ON(rb_node);
+               list_add_tail(&node->lower, &cache->leaves);
+       }
 
        list_for_each_entry(edge, &node->upper, list[LOWER])
                list_add_tail(&edge->list[UPPER], &list);
@@ -795,6 +1007,14 @@ next:
                edge = list_entry(list.next, struct backref_edge, list[UPPER]);
                list_del_init(&edge->list[UPPER]);
                upper = edge->node[UPPER];
+               if (upper->detached) {
+                       list_del(&edge->list[LOWER]);
+                       lower = edge->node[LOWER];
+                       free_backref_edge(cache, edge);
+                       if (list_empty(&lower->upper))
+                               list_add(&lower->list, &useless);
+                       continue;
+               }
 
                if (!RB_EMPTY_NODE(&upper->rb_node)) {
                        if (upper->lowest) {
@@ -807,25 +1027,69 @@ next:
                }
 
                BUG_ON(!upper->checked);
-               rb_node = tree_insert(&cache->rb_root, upper->bytenr,
-                                     &upper->rb_node);
-               BUG_ON(rb_node);
+               BUG_ON(cowonly != upper->cowonly);
+               if (!cowonly) {
+                       rb_node = tree_insert(&cache->rb_root, upper->bytenr,
+                                             &upper->rb_node);
+                       BUG_ON(rb_node);
+               }
 
                list_add_tail(&edge->list[UPPER], &upper->lower);
 
                list_for_each_entry(edge, &upper->upper, list[LOWER])
                        list_add_tail(&edge->list[UPPER], &list);
        }
+       /*
+        * process useless backref nodes. backref nodes for tree leaves
+        * are deleted from the cache. backref nodes for upper level
+        * tree blocks are left in the cache to avoid unnecessary backref
+        * lookup.
+        */
+       while (!list_empty(&useless)) {
+               upper = list_entry(useless.next, struct backref_node, list);
+               list_del_init(&upper->list);
+               BUG_ON(!list_empty(&upper->upper));
+               if (upper == node)
+                       node = NULL;
+               if (upper->lowest) {
+                       list_del_init(&upper->lower);
+                       upper->lowest = 0;
+               }
+               while (!list_empty(&upper->lower)) {
+                       edge = list_entry(upper->lower.next,
+                                         struct backref_edge, list[UPPER]);
+                       list_del(&edge->list[UPPER]);
+                       list_del(&edge->list[LOWER]);
+                       lower = edge->node[LOWER];
+                       free_backref_edge(cache, edge);
+
+                       if (list_empty(&lower->upper))
+                               list_add(&lower->list, &useless);
+               }
+               __mark_block_processed(rc, upper);
+               if (upper->level > 0) {
+                       list_add(&upper->list, &cache->detached);
+                       upper->detached = 1;
+               } else {
+                       rb_erase(&upper->rb_node, &cache->rb_root);
+                       free_backref_node(cache, upper);
+               }
+       }
 out:
        btrfs_free_path(path1);
        btrfs_free_path(path2);
        if (err) {
-               INIT_LIST_HEAD(&list);
+               while (!list_empty(&useless)) {
+                       lower = list_entry(useless.next,
+                                          struct backref_node, upper);
+                       list_del_init(&lower->upper);
+               }
                upper = node;
+               INIT_LIST_HEAD(&list);
                while (upper) {
                        if (RB_EMPTY_NODE(&upper->rb_node)) {
                                list_splice_tail(&upper->upper, &list);
-                               kfree(upper);
+                               free_backref_node(cache, upper);
                        }
 
                        if (list_empty(&list))
@@ -833,15 +1097,104 @@ out:
 
                        edge = list_entry(list.next, struct backref_edge,
                                          list[LOWER]);
+                       list_del(&edge->list[LOWER]);
                        upper = edge->node[UPPER];
-                       kfree(edge);
+                       free_backref_edge(cache, edge);
                }
                return ERR_PTR(err);
        }
+       BUG_ON(node && node->detached);
        return node;
 }
 
 /*
+ * helper to add backref node for the newly created snapshot.
+ * the backref node is created by cloning backref node that
+ * corresponds to root of source tree
+ */
+static int clone_backref_node(struct btrfs_trans_handle *trans,
+                             struct reloc_control *rc,
+                             struct btrfs_root *src,
+                             struct btrfs_root *dest)
+{
+       struct btrfs_root *reloc_root = src->reloc_root;
+       struct backref_cache *cache = &rc->backref_cache;
+       struct backref_node *node = NULL;
+       struct backref_node *new_node;
+       struct backref_edge *edge;
+       struct backref_edge *new_edge;
+       struct rb_node *rb_node;
+
+       if (cache->last_trans > 0)
+               update_backref_cache(trans, cache);
+
+       rb_node = tree_search(&cache->rb_root, src->commit_root->start);
+       if (rb_node) {
+               node = rb_entry(rb_node, struct backref_node, rb_node);
+               if (node->detached)
+                       node = NULL;
+               else
+                       BUG_ON(node->new_bytenr != reloc_root->node->start);
+       }
+
+       if (!node) {
+               rb_node = tree_search(&cache->rb_root,
+                                     reloc_root->commit_root->start);
+               if (rb_node) {
+                       node = rb_entry(rb_node, struct backref_node,
+                                       rb_node);
+                       BUG_ON(node->detached);
+               }
+       }
+
+       if (!node)
+               return 0;
+
+       new_node = alloc_backref_node(cache);
+       if (!new_node)
+               return -ENOMEM;
+
+       new_node->bytenr = dest->node->start;
+       new_node->level = node->level;
+       new_node->lowest = node->lowest;
+       new_node->root = dest;
+
+       if (!node->lowest) {
+               list_for_each_entry(edge, &node->lower, list[UPPER]) {
+                       new_edge = alloc_backref_edge(cache);
+                       if (!new_edge)
+                               goto fail;
+
+                       new_edge->node[UPPER] = new_node;
+                       new_edge->node[LOWER] = edge->node[LOWER];
+                       list_add_tail(&new_edge->list[UPPER],
+                                     &new_node->lower);
+               }
+       }
+
+       rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
+                             &new_node->rb_node);
+       BUG_ON(rb_node);
+
+       if (!new_node->lowest) {
+               list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
+                       list_add_tail(&new_edge->list[LOWER],
+                                     &new_edge->node[LOWER]->upper);
+               }
+       }
+       return 0;
+fail:
+       while (!list_empty(&new_node->lower)) {
+               new_edge = list_entry(new_node->lower.next,
+                                     struct backref_edge, list[UPPER]);
+               list_del(&new_edge->list[UPPER]);
+               free_backref_edge(cache, new_edge);
+       }
+       free_backref_node(cache, new_node);
+       return -ENOMEM;
+}
+
+/*
  * helper to add 'address of tree root -> reloc tree' mapping
  */
 static int __add_reloc_root(struct btrfs_root *root)
@@ -901,12 +1254,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
        return 0;
 }
 
-/*
- * create reloc tree for a given fs tree. reloc tree is just a
- * snapshot of the fs tree with special root objectid.
- */
-int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root)
+static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
+                                       struct btrfs_root *root, u64 objectid)
 {
        struct btrfs_root *reloc_root;
        struct extent_buffer *eb;
@@ -914,36 +1263,45 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
        struct btrfs_key root_key;
        int ret;
 
-       if (root->reloc_root) {
-               reloc_root = root->reloc_root;
-               reloc_root->last_trans = trans->transid;
-               return 0;
-       }
-
-       if (!root->fs_info->reloc_ctl ||
-           !root->fs_info->reloc_ctl->create_reloc_root ||
-           root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
-               return 0;
-
        root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
        BUG_ON(!root_item);
 
        root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
        root_key.type = BTRFS_ROOT_ITEM_KEY;
-       root_key.offset = root->root_key.objectid;
+       root_key.offset = objectid;
 
-       ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
-                             BTRFS_TREE_RELOC_OBJECTID);
-       BUG_ON(ret);
+       if (root->root_key.objectid == objectid) {
+               /* called by btrfs_init_reloc_root */
+               ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
+                                     BTRFS_TREE_RELOC_OBJECTID);
+               BUG_ON(ret);
+
+               btrfs_set_root_last_snapshot(&root->root_item,
+                                            trans->transid - 1);
+       } else {
+               /*
+                * called by btrfs_reloc_post_snapshot_hook.
+                * the source tree is a reloc tree, all tree blocks
+                * modified after it was created have RELOC flag
+                * set in their headers. so it's OK to not update
+                * the 'last_snapshot'.
+                */
+               ret = btrfs_copy_root(trans, root, root->node, &eb,
+                                     BTRFS_TREE_RELOC_OBJECTID);
+               BUG_ON(ret);
+       }
 
-       btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1);
        memcpy(root_item, &root->root_item, sizeof(*root_item));
-       btrfs_set_root_refs(root_item, 1);
        btrfs_set_root_bytenr(root_item, eb->start);
        btrfs_set_root_level(root_item, btrfs_header_level(eb));
        btrfs_set_root_generation(root_item, trans->transid);
-       memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key));
-       root_item->drop_level = 0;
+
+       if (root->root_key.objectid == objectid) {
+               btrfs_set_root_refs(root_item, 0);
+               memset(&root_item->drop_progress, 0,
+                      sizeof(struct btrfs_disk_key));
+               root_item->drop_level = 0;
+       }
 
        btrfs_tree_unlock(eb);
        free_extent_buffer(eb);
@@ -957,6 +1315,37 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
                                                 &root_key);
        BUG_ON(IS_ERR(reloc_root));
        reloc_root->last_trans = trans->transid;
+       return reloc_root;
+}
+
+/*
+ * create reloc tree for a given fs tree. reloc tree is just a
+ * snapshot of the fs tree with special root objectid.
+ */
+int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root)
+{
+       struct btrfs_root *reloc_root;
+       struct reloc_control *rc = root->fs_info->reloc_ctl;
+       int clear_rsv = 0;
+
+       if (root->reloc_root) {
+               reloc_root = root->reloc_root;
+               reloc_root->last_trans = trans->transid;
+               return 0;
+       }
+
+       if (!rc || !rc->create_reloc_tree ||
+           root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+               return 0;
+
+       if (!trans->block_rsv) {
+               trans->block_rsv = rc->block_rsv;
+               clear_rsv = 1;
+       }
+       reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
+       if (clear_rsv)
+               trans->block_rsv = NULL;
 
        __add_reloc_root(reloc_root);
        root->reloc_root = reloc_root;
@@ -980,7 +1369,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
        reloc_root = root->reloc_root;
        root_item = &reloc_root->root_item;
 
-       if (btrfs_root_refs(root_item) == 0) {
+       if (root->fs_info->reloc_ctl->merge_reloc_tree &&
+           btrfs_root_refs(root_item) == 0) {
                root->reloc_root = NULL;
                del = 1;
        }
@@ -1102,8 +1492,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
                goto out;
        }
 
-       if (new_bytenr)
-               *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+       *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
        ret = 0;
 out:
        btrfs_free_path(path);
@@ -1114,19 +1503,18 @@ out:
  * update file extent items in the tree leaf to point to
  * the new locations.
  */
-static int replace_file_extents(struct btrfs_trans_handle *trans,
-                               struct reloc_control *rc,
-                               struct btrfs_root *root,
-                               struct extent_buffer *leaf,
-                               struct list_head *inode_list)
+static noinline_for_stack
+int replace_file_extents(struct btrfs_trans_handle *trans,
+                        struct reloc_control *rc,
+                        struct btrfs_root *root,
+                        struct extent_buffer *leaf)
 {
        struct btrfs_key key;
        struct btrfs_file_extent_item *fi;
        struct inode *inode = NULL;
-       struct inodevec *ivec = NULL;
        u64 parent;
        u64 bytenr;
-       u64 new_bytenr;
+       u64 new_bytenr = 0;
        u64 num_bytes;
        u64 end;
        u32 nritems;
@@ -1166,21 +1554,12 @@ static int replace_file_extents(struct btrfs_trans_handle *trans,
                 * to complete and drop the extent cache
                 */
                if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
-                       if (!ivec || ivec->nr == INODEVEC_SIZE) {
-                               ivec = kmalloc(sizeof(*ivec), GFP_NOFS);
-                               BUG_ON(!ivec);
-                               ivec->nr = 0;
-                               list_add_tail(&ivec->list, inode_list);
-                       }
                        if (first) {
                                inode = find_next_inode(root, key.objectid);
-                               if (inode)
-                                       ivec->inode[ivec->nr++] = inode;
                                first = 0;
                        } else if (inode && inode->i_ino < key.objectid) {
+                               btrfs_add_delayed_iput(inode);
                                inode = find_next_inode(root, key.objectid);
-                               if (inode)
-                                       ivec->inode[ivec->nr++] = inode;
                        }
                        if (inode && inode->i_ino == key.objectid) {
                                end = key.offset +
@@ -1204,8 +1583,10 @@ static int replace_file_extents(struct btrfs_trans_handle *trans,
 
                ret = get_new_location(rc->data_inode, &new_bytenr,
                                       bytenr, num_bytes);
-               if (ret > 0)
+               if (ret > 0) {
+                       WARN_ON(1);
                        continue;
+               }
                BUG_ON(ret < 0);
 
                btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
@@ -1225,6 +1606,8 @@ static int replace_file_extents(struct btrfs_trans_handle *trans,
        }
        if (dirty)
                btrfs_mark_buffer_dirty(leaf);
+       if (inode)
+               btrfs_add_delayed_iput(inode);
        return 0;
 }
 
@@ -1248,11 +1631,11 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot,
  * if no block got replaced, 0 is returned. if there are other
  * errors, a negative error number is returned.
  */
-static int replace_path(struct btrfs_trans_handle *trans,
-                       struct btrfs_root *dest, struct btrfs_root *src,
-                       struct btrfs_path *path, struct btrfs_key *next_key,
-                       struct extent_buffer **leaf,
-                       int lowest_level, int max_level)
+static noinline_for_stack
+int replace_path(struct btrfs_trans_handle *trans,
+                struct btrfs_root *dest, struct btrfs_root *src,
+                struct btrfs_path *path, struct btrfs_key *next_key,
+                int lowest_level, int max_level)
 {
        struct extent_buffer *eb;
        struct extent_buffer *parent;
@@ -1263,16 +1646,16 @@ static int replace_path(struct btrfs_trans_handle *trans,
        u64 new_ptr_gen;
        u64 last_snapshot;
        u32 blocksize;
+       int cow = 0;
        int level;
        int ret;
        int slot;
 
        BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
        BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
-       BUG_ON(lowest_level > 1 && leaf);
 
        last_snapshot = btrfs_root_last_snapshot(&src->root_item);
-
+again:
        slot = path->slots[lowest_level];
        btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
 
@@ -1286,8 +1669,10 @@ static int replace_path(struct btrfs_trans_handle *trans,
                return 0;
        }
 
-       ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
-       BUG_ON(ret);
+       if (cow) {
+               ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
+               BUG_ON(ret);
+       }
        btrfs_set_lock_blocking(eb);
 
        if (next_key) {
@@ -1331,7 +1716,7 @@ static int replace_path(struct btrfs_trans_handle *trans,
 
                if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
                    memcmp_node_keys(parent, slot, path, level)) {
-                       if (level <= lowest_level && !leaf) {
+                       if (level <= lowest_level) {
                                ret = 0;
                                break;
                        }
@@ -1339,16 +1724,12 @@ static int replace_path(struct btrfs_trans_handle *trans,
                        eb = read_tree_block(dest, old_bytenr, blocksize,
                                             old_ptr_gen);
                        btrfs_tree_lock(eb);
-                       ret = btrfs_cow_block(trans, dest, eb, parent,
-                                             slot, &eb);
-                       BUG_ON(ret);
-                       btrfs_set_lock_blocking(eb);
-
-                       if (level <= lowest_level) {
-                               *leaf = eb;
-                               ret = 0;
-                               break;
+                       if (cow) {
+                               ret = btrfs_cow_block(trans, dest, eb, parent,
+                                                     slot, &eb);
+                               BUG_ON(ret);
                        }
+                       btrfs_set_lock_blocking(eb);
 
                        btrfs_tree_unlock(parent);
                        free_extent_buffer(parent);
@@ -1357,6 +1738,13 @@ static int replace_path(struct btrfs_trans_handle *trans,
                        continue;
                }
 
+               if (!cow) {
+                       btrfs_tree_unlock(parent);
+                       free_extent_buffer(parent);
+                       cow = 1;
+                       goto again;
+               }
+
                btrfs_node_key_to_cpu(path->nodes[level], &key,
                                      path->slots[level]);
                btrfs_release_path(src, path);
@@ -1562,20 +1950,6 @@ static int invalidate_extent_cache(struct btrfs_root *root,
        return 0;
 }
 
-static void put_inodes(struct list_head *list)
-{
-       struct inodevec *ivec;
-       while (!list_empty(list)) {
-               ivec = list_entry(list->next, struct inodevec, list);
-               list_del(&ivec->list);
-               while (ivec->nr > 0) {
-                       ivec->nr--;
-                       iput(ivec->inode[ivec->nr]);
-               }
-               kfree(ivec);
-       }
-}
-
 static int find_next_key(struct btrfs_path *path, int level,
                         struct btrfs_key *key)
 
@@ -1608,13 +1982,14 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
        struct btrfs_root *reloc_root;
        struct btrfs_root_item *root_item;
        struct btrfs_path *path;
-       struct extent_buffer *leaf = NULL;
+       struct extent_buffer *leaf;
        unsigned long nr;
        int level;
        int max_level;
        int replaced = 0;
        int ret;
        int err = 0;
+       u32 min_reserved;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -1648,34 +2023,23 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
                btrfs_unlock_up_safe(path, 0);
        }
 
-       if (level == 0 && rc->stage == UPDATE_DATA_PTRS) {
-               trans = btrfs_start_transaction(root, 1);
+       min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
+       memset(&next_key, 0, sizeof(next_key));
 
-               leaf = path->nodes[0];
-               btrfs_item_key_to_cpu(leaf, &key, 0);
-               btrfs_release_path(reloc_root, path);
+       while (1) {
+               trans = btrfs_start_transaction(root, 0);
+               trans->block_rsv = rc->block_rsv;
 
-               ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
-               if (ret < 0) {
-                       err = ret;
-                       goto out;
+               ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
+                                           min_reserved, 0);
+               if (ret) {
+                       BUG_ON(ret != -EAGAIN);
+                       ret = btrfs_commit_transaction(trans, root);
+                       BUG_ON(ret);
+                       continue;
                }
 
-               leaf = path->nodes[0];
-               btrfs_unlock_up_safe(path, 1);
-               ret = replace_file_extents(trans, rc, root, leaf,
-                                          &inode_list);
-               if (ret < 0)
-                       err = ret;
-               goto out;
-       }
-
-       memset(&next_key, 0, sizeof(next_key));
-
-       while (1) {
-               leaf = NULL;
                replaced = 0;
-               trans = btrfs_start_transaction(root, 1);
                max_level = level;
 
                ret = walk_down_reloc_tree(reloc_root, path, &level);
@@ -1689,14 +2053,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
                if (!find_next_key(path, level, &key) &&
                    btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
                        ret = 0;
-               } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) {
-                       ret = replace_path(trans, root, reloc_root,
-                                          path, &next_key, &leaf,
-                                          level, max_level);
                } else {
-                       ret = replace_path(trans, root, reloc_root,
-                                          path, &next_key, NULL,
-                                          level, max_level);
+                       ret = replace_path(trans, root, reloc_root, path,
+                                          &next_key, level, max_level);
                }
                if (ret < 0) {
                        err = ret;
@@ -1708,16 +2067,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
                        btrfs_node_key_to_cpu(path->nodes[level], &key,
                                              path->slots[level]);
                        replaced = 1;
-               } else if (leaf) {
-                       /*
-                        * no block got replaced, try replacing file extents
-                        */
-                       btrfs_item_key_to_cpu(leaf, &key, 0);
-                       ret = replace_file_extents(trans, rc, root, leaf,
-                                                  &inode_list);
-                       btrfs_tree_unlock(leaf);
-                       free_extent_buffer(leaf);
-                       BUG_ON(ret < 0);
                }
 
                ret = walk_up_reloc_tree(reloc_root, path, &level);
@@ -1734,15 +2083,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
                root_item->drop_level = level;
 
                nr = trans->blocks_used;
-               btrfs_end_transaction(trans, root);
+               btrfs_end_transaction_throttle(trans, root);
 
                btrfs_btree_balance_dirty(root, nr);
 
-               /*
-                * put inodes outside transaction, otherwise we may deadlock.
-                */
-               put_inodes(&inode_list);
-
                if (replaced && rc->stage == UPDATE_DATA_PTRS)
                        invalidate_extent_cache(root, &key, &next_key);
        }
@@ -1765,87 +2109,125 @@ out:
                       sizeof(root_item->drop_progress));
                root_item->drop_level = 0;
                btrfs_set_root_refs(root_item, 0);
+               btrfs_update_reloc_root(trans, root);
        }
 
        nr = trans->blocks_used;
-       btrfs_end_transaction(trans, root);
+       btrfs_end_transaction_throttle(trans, root);
 
        btrfs_btree_balance_dirty(root, nr);
 
-       put_inodes(&inode_list);
-
        if (replaced && rc->stage == UPDATE_DATA_PTRS)
                invalidate_extent_cache(root, &key, &next_key);
 
        return err;
 }
 
-/*
- * callback for the work threads.
- * this function merges reloc tree with corresponding fs tree,
- * and then drops the reloc tree.
- */
-static void merge_func(struct btrfs_work *work)
+static noinline_for_stack
+int prepare_to_merge(struct reloc_control *rc, int err)
 {
-       struct btrfs_trans_handle *trans;
-       struct btrfs_root *root;
+       struct btrfs_root *root = rc->extent_root;
        struct btrfs_root *reloc_root;
-       struct async_merge *async;
+       struct btrfs_trans_handle *trans;
+       LIST_HEAD(reloc_roots);
+       u64 num_bytes = 0;
+       int ret;
+       int retries = 0;
+
+       mutex_lock(&root->fs_info->trans_mutex);
+       rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
+       rc->merging_rsv_size += rc->nodes_relocated * 2;
+       mutex_unlock(&root->fs_info->trans_mutex);
+again:
+       if (!err) {
+               num_bytes = rc->merging_rsv_size;
+               ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
+                                         num_bytes, &retries);
+               if (ret)
+                       err = ret;
+       }
 
-       async = container_of(work, struct async_merge, work);
-       reloc_root = async->root;
+       trans = btrfs_join_transaction(rc->extent_root, 1);
+
+       if (!err) {
+               if (num_bytes != rc->merging_rsv_size) {
+                       btrfs_end_transaction(trans, rc->extent_root);
+                       btrfs_block_rsv_release(rc->extent_root,
+                                               rc->block_rsv, num_bytes);
+                       retries = 0;
+                       goto again;
+               }
+       }
+
+       rc->merge_reloc_tree = 1;
+
+       while (!list_empty(&rc->reloc_roots)) {
+               reloc_root = list_entry(rc->reloc_roots.next,
+                                       struct btrfs_root, root_list);
+               list_del_init(&reloc_root->root_list);
 
-       if (btrfs_root_refs(&reloc_root->root_item) > 0) {
                root = read_fs_root(reloc_root->fs_info,
                                    reloc_root->root_key.offset);
                BUG_ON(IS_ERR(root));
                BUG_ON(root->reloc_root != reloc_root);
 
-               merge_reloc_root(async->rc, root);
-
-               trans = btrfs_start_transaction(root, 1);
+               /*
+                * set reference count to 1, so btrfs_recover_relocation
+                * knows it should resumes merging
+                */
+               if (!err)
+                       btrfs_set_root_refs(&reloc_root->root_item, 1);
                btrfs_update_reloc_root(trans, root);
-               btrfs_end_transaction(trans, root);
-       }
 
-       btrfs_drop_snapshot(reloc_root, 0);
+               list_add(&reloc_root->root_list, &reloc_roots);
+       }
 
-       if (atomic_dec_and_test(async->num_pending))
-               complete(async->done);
+       list_splice(&reloc_roots, &rc->reloc_roots);
 
-       kfree(async);
+       if (!err)
+               btrfs_commit_transaction(trans, rc->extent_root);
+       else
+               btrfs_end_transaction(trans, rc->extent_root);
+       return err;
 }
 
-static int merge_reloc_roots(struct reloc_control *rc)
+static noinline_for_stack
+int merge_reloc_roots(struct reloc_control *rc)
 {
-       struct async_merge *async;
        struct btrfs_root *root;
-       struct completion done;
-       atomic_t num_pending;
+       struct btrfs_root *reloc_root;
+       LIST_HEAD(reloc_roots);
+       int found = 0;
+       int ret;
+again:
+       root = rc->extent_root;
+       mutex_lock(&root->fs_info->trans_mutex);
+       list_splice_init(&rc->reloc_roots, &reloc_roots);
+       mutex_unlock(&root->fs_info->trans_mutex);
 
-       init_completion(&done);
-       atomic_set(&num_pending, 1);
+       while (!list_empty(&reloc_roots)) {
+               found = 1;
+               reloc_root = list_entry(reloc_roots.next,
+                                       struct btrfs_root, root_list);
 
-       while (!list_empty(&rc->reloc_roots)) {
-               root = list_entry(rc->reloc_roots.next,
-                                 struct btrfs_root, root_list);
-               list_del_init(&root->root_list);
+               if (btrfs_root_refs(&reloc_root->root_item) > 0) {
+                       root = read_fs_root(reloc_root->fs_info,
+                                           reloc_root->root_key.offset);
+                       BUG_ON(IS_ERR(root));
+                       BUG_ON(root->reloc_root != reloc_root);
 
-               async = kmalloc(sizeof(*async), GFP_NOFS);
-               BUG_ON(!async);
-               async->work.func = merge_func;
-               async->work.flags = 0;
-               async->rc = rc;
-               async->root = root;
-               async->done = &done;
-               async->num_pending = &num_pending;
-               atomic_inc(&num_pending);
-               btrfs_queue_worker(&rc->workers, &async->work);
+                       ret = merge_reloc_root(rc, root);
+                       BUG_ON(ret);
+               } else {
+                       list_del_init(&reloc_root->root_list);
+               }
+               btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0);
        }
 
-       if (!atomic_dec_and_test(&num_pending))
-               wait_for_completion(&done);
-
+       if (found) {
+               found = 0;
+               goto again;
+       }
        BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
        return 0;
 }
@@ -1876,119 +2258,169 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
        return btrfs_record_root_in_trans(trans, root);
 }
 
-/*
- * select one tree from trees that references the block.
- * for blocks in refernce counted trees, we preper reloc tree.
- * if no reloc tree found and reloc_only is true, NULL is returned.
- */
-static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans,
-                                           struct backref_node *node,
-                                           struct backref_edge *edges[],
-                                           int *nr, int reloc_only)
+static noinline_for_stack
+struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
+                                    struct reloc_control *rc,
+                                    struct backref_node *node,
+                                    struct backref_edge *edges[], int *nr)
 {
        struct backref_node *next;
        struct btrfs_root *root;
-       int index;
-       int loop = 0;
-again:
-       index = 0;
+       int index = 0;
+
        next = node;
        while (1) {
                cond_resched();
                next = walk_up_backref(next, edges, &index);
                root = next->root;
-               if (!root) {
-                       BUG_ON(!node->old_root);
-                       goto skip;
-               }
-
-               /* no other choice for non-refernce counted tree */
-               if (!root->ref_cows) {
-                       BUG_ON(reloc_only);
-                       break;
-               }
+               BUG_ON(!root);
+               BUG_ON(!root->ref_cows);
 
                if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
                        record_reloc_root_in_trans(trans, root);
                        break;
                }
 
-               if (loop) {
-                       btrfs_record_root_in_trans(trans, root);
+               btrfs_record_root_in_trans(trans, root);
+               root = root->reloc_root;
+
+               if (next->new_bytenr != root->node->start) {
+                       BUG_ON(next->new_bytenr);
+                       BUG_ON(!list_empty(&next->list));
+                       next->new_bytenr = root->node->start;
+                       next->root = root;
+                       list_add_tail(&next->list,
+                                     &rc->backref_cache.changed);
+                       __mark_block_processed(rc, next);
                        break;
                }
 
-               if (reloc_only || next != node) {
-                       if (!root->reloc_root)
-                               btrfs_record_root_in_trans(trans, root);
-                       root = root->reloc_root;
-                       /*
-                        * if the reloc tree was created in current
-                        * transation, there is no node in backref tree
-                        * corresponds to the root of the reloc tree.
-                        */
-                       if (btrfs_root_last_snapshot(&root->root_item) ==
-                           trans->transid - 1)
-                               break;
-               }
-skip:
+               WARN_ON(1);
                root = NULL;
                next = walk_down_backref(edges, &index);
                if (!next || next->level <= node->level)
                        break;
        }
+       if (!root)
+               return NULL;
 
-       if (!root && !loop && !reloc_only) {
-               loop = 1;
-               goto again;
+       *nr = index;
+       next = node;
+       /* setup backref node path for btrfs_reloc_cow_block */
+       while (1) {
+               rc->backref_cache.path[next->level] = next;
+               if (--index < 0)
+                       break;
+               next = edges[index]->node[UPPER];
        }
-
-       if (root)
-               *nr = index;
-       else
-               *nr = 0;
-
        return root;
 }
 
+/*
+ * select a tree root for relocation. return NULL if the block
+ * is reference counted. we should use do_relocation() in this
+ * case. return a tree root pointer if the block isn't reference
+ * counted. return -ENOENT if the block is root of reloc tree.
+ */
 static noinline_for_stack
 struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
                                   struct backref_node *node)
 {
+       struct backref_node *next;
+       struct btrfs_root *root;
+       struct btrfs_root *fs_root = NULL;
        struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
-       int nr;
-       return __select_one_root(trans, node, edges, &nr, 0);
+       int index = 0;
+
+       next = node;
+       while (1) {
+               cond_resched();
+               next = walk_up_backref(next, edges, &index);
+               root = next->root;
+               BUG_ON(!root);
+
+               /* no other choice for non-refernce counted tree */
+               if (!root->ref_cows)
+                       return root;
+
+               if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
+                       fs_root = root;
+
+               if (next != node)
+                       return NULL;
+
+               next = walk_down_backref(edges, &index);
+               if (!next || next->level <= node->level)
+                       break;
+       }
+
+       if (!fs_root)
+               return ERR_PTR(-ENOENT);
+       return fs_root;
 }
 
 static noinline_for_stack
-struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
-                                    struct backref_node *node,
-                                    struct backref_edge *edges[], int *nr)
+u64 calcu_metadata_size(struct reloc_control *rc,
+                       struct backref_node *node, int reserve)
 {
-       return __select_one_root(trans, node, edges, nr, 1);
+       struct backref_node *next = node;
+       struct backref_edge *edge;
+       struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
+       u64 num_bytes = 0;
+       int index = 0;
+
+       BUG_ON(reserve && node->processed);
+
+       while (next) {
+               cond_resched();
+               while (1) {
+                       if (next->processed && (reserve || next != node))
+                               break;
+
+                       num_bytes += btrfs_level_size(rc->extent_root,
+                                                     next->level);
+
+                       if (list_empty(&next->upper))
+                               break;
+
+                       edge = list_entry(next->upper.next,
+                                         struct backref_edge, list[LOWER]);
+                       edges[index++] = edge;
+                       next = edge->node[UPPER];
+               }
+               next = walk_down_backref(edges, &index);
+       }
+       return num_bytes;
 }
 
-static void grab_path_buffers(struct btrfs_path *path,
-                             struct backref_node *node,
-                             struct backref_edge *edges[], int nr)
+static int reserve_metadata_space(struct btrfs_trans_handle *trans,
+                                 struct reloc_control *rc,
+                                 struct backref_node *node)
 {
-       int i = 0;
-       while (1) {
-               drop_node_buffer(node);
-               node->eb = path->nodes[node->level];
-               BUG_ON(!node->eb);
-               if (path->locks[node->level])
-                       node->locked = 1;
-               path->nodes[node->level] = NULL;
-               path->locks[node->level] = 0;
-
-               if (i >= nr)
-                       break;
+       struct btrfs_root *root = rc->extent_root;
+       u64 num_bytes;
+       int ret;
 
-               edges[i]->blockptr = node->eb->start;
-               node = edges[i]->node[UPPER];
-               i++;
+       num_bytes = calcu_metadata_size(rc, node, 1) * 2;
+
+       trans->block_rsv = rc->block_rsv;
+       ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
+                                 &rc->block_rsv_retries);
+       if (ret) {
+               if (ret == -EAGAIN)
+                       rc->commit_transaction = 1;
+               return ret;
        }
+
+       rc->block_rsv_retries = 0;
+       return 0;
+}
+
+static void release_metadata_space(struct reloc_control *rc,
+                                  struct backref_node *node)
+{
+       u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
+       btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
 }
 
 /*
@@ -1999,6 +2431,7 @@ static void grab_path_buffers(struct btrfs_path *path,
  * in that case this function just updates pointers.
  */
 static int do_relocation(struct btrfs_trans_handle *trans,
+                        struct reloc_control *rc,
                         struct backref_node *node,
                         struct btrfs_key *key,
                         struct btrfs_path *path, int lowest)
@@ -2019,18 +2452,25 @@ static int do_relocation(struct btrfs_trans_handle *trans,
        BUG_ON(lowest && node->eb);
 
        path->lowest_level = node->level + 1;
+       rc->backref_cache.path[node->level] = node;
        list_for_each_entry(edge, &node->upper, list[LOWER]) {
                cond_resched();
-               if (node->eb && node->eb->start == edge->blockptr)
-                       continue;
 
                upper = edge->node[UPPER];
-               root = select_reloc_root(trans, upper, edges, &nr);
-               if (!root)
-                       continue;
-
-               if (upper->eb && !upper->locked)
+               root = select_reloc_root(trans, rc, upper, edges, &nr);
+               BUG_ON(!root);
+
+               if (upper->eb && !upper->locked) {
+                       if (!lowest) {
+                               ret = btrfs_bin_search(upper->eb, key,
+                                                      upper->level, &slot);
+                               BUG_ON(ret);
+                               bytenr = btrfs_node_blockptr(upper->eb, slot);
+                               if (node->eb->start == bytenr)
+                                       goto next;
+                       }
                        drop_node_buffer(upper);
+               }
 
                if (!upper->eb) {
                        ret = btrfs_search_slot(trans, root, key, path, 0, 1);
@@ -2040,11 +2480,17 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                        }
                        BUG_ON(ret > 0);
 
-                       slot = path->slots[upper->level];
+                       if (!upper->eb) {
+                               upper->eb = path->nodes[upper->level];
+                               path->nodes[upper->level] = NULL;
+                       } else {
+                               BUG_ON(upper->eb != path->nodes[upper->level]);
+                       }
 
-                       btrfs_unlock_up_safe(path, upper->level + 1);
-                       grab_path_buffers(path, upper, edges, nr);
+                       upper->locked = 1;
+                       path->locks[upper->level] = 0;
 
+                       slot = path->slots[upper->level];
                        btrfs_release_path(NULL, path);
                } else {
                        ret = btrfs_bin_search(upper->eb, key, upper->level,
@@ -2053,14 +2499,11 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                }
 
                bytenr = btrfs_node_blockptr(upper->eb, slot);
-               if (!lowest) {
-                       if (node->eb->start == bytenr) {
-                               btrfs_tree_unlock(upper->eb);
-                               upper->locked = 0;
-                               continue;
-                       }
+               if (lowest) {
+                       BUG_ON(bytenr != node->bytenr);
                } else {
-                       BUG_ON(node->bytenr != bytenr);
+                       if (node->eb->start == bytenr)
+                               goto next;
                }
 
                blocksize = btrfs_level_size(root, node->level);
@@ -2072,13 +2515,13 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                if (!node->eb) {
                        ret = btrfs_cow_block(trans, root, eb, upper->eb,
                                              slot, &eb);
+                       btrfs_tree_unlock(eb);
+                       free_extent_buffer(eb);
                        if (ret < 0) {
                                err = ret;
-                               break;
+                               goto next;
                        }
-                       btrfs_set_lock_blocking(eb);
-                       node->eb = eb;
-                       node->locked = 1;
+                       BUG_ON(node->eb != eb);
                } else {
                        btrfs_set_node_blockptr(upper->eb, slot,
                                                node->eb->start);
@@ -2096,67 +2539,80 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                        ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
                        BUG_ON(ret);
                }
-               if (!lowest) {
-                       btrfs_tree_unlock(upper->eb);
-                       upper->locked = 0;
-               }
+next:
+               if (!upper->pending)
+                       drop_node_buffer(upper);
+               else
+                       unlock_node_buffer(upper);
+               if (err)
+                       break;
+       }
+
+       if (!err && node->pending) {
+               drop_node_buffer(node);
+               list_move_tail(&node->list, &rc->backref_cache.changed);
+               node->pending = 0;
        }
+
        path->lowest_level = 0;
+       BUG_ON(err == -ENOSPC);
        return err;
 }
 
 static int link_to_upper(struct btrfs_trans_handle *trans,
+                        struct reloc_control *rc,
                         struct backref_node *node,
                         struct btrfs_path *path)
 {
        struct btrfs_key key;
-       if (!node->eb || list_empty(&node->upper))
-               return 0;
 
        btrfs_node_key_to_cpu(node->eb, &key, 0);
-       return do_relocation(trans, node, &key, path, 0);
+       return do_relocation(trans, rc, node, &key, path, 0);
 }
 
 static int finish_pending_nodes(struct btrfs_trans_handle *trans,
-                               struct backref_cache *cache,
-                               struct btrfs_path *path)
+                               struct reloc_control *rc,
+                               struct btrfs_path *path, int err)
 {
+       LIST_HEAD(list);
+       struct backref_cache *cache = &rc->backref_cache;
        struct backref_node *node;
        int level;
        int ret;
-       int err = 0;
 
        for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
                while (!list_empty(&cache->pending[level])) {
                        node = list_entry(cache->pending[level].next,
-                                         struct backref_node, lower);
-                       BUG_ON(node->level != level);
+                                         struct backref_node, list);
+                       list_move_tail(&node->list, &list);
+                       BUG_ON(!node->pending);
 
-                       ret = link_to_upper(trans, node, path);
-                       if (ret < 0)
-                               err = ret;
-                       /*
-                        * this remove the node from the pending list and
-                        * may add some other nodes to the level + 1
-                        * pending list
-                        */
-                       remove_backref_node(cache, node);
+                       if (!err) {
+                               ret = link_to_upper(trans, rc, node, path);
+                               if (ret < 0)
+                                       err = ret;
+                       }
                }
+               list_splice_init(&list, &cache->pending[level]);
        }
-       BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
        return err;
 }
 
 static void mark_block_processed(struct reloc_control *rc,
-                                struct backref_node *node)
+                                u64 bytenr, u32 blocksize)
+{
+       set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1,
+                       EXTENT_DIRTY, GFP_NOFS);
+}
+
+static void __mark_block_processed(struct reloc_control *rc,
+                                  struct backref_node *node)
 {
        u32 blocksize;
        if (node->level == 0 ||
            in_block_group(node->bytenr, rc->block_group)) {
                blocksize = btrfs_level_size(rc->extent_root, node->level);
-               set_extent_bits(&rc->processed_blocks, node->bytenr,
-                               node->bytenr + blocksize - 1, EXTENT_DIRTY,
-                               GFP_NOFS);
+               mark_block_processed(rc, node->bytenr, blocksize);
        }
        node->processed = 1;
 }
@@ -2179,7 +2635,7 @@ static void update_processed_blocks(struct reloc_control *rc,
                        if (next->processed)
                                break;
 
-                       mark_block_processed(rc, next);
+                       __mark_block_processed(rc, next);
 
                        if (list_empty(&next->upper))
                                break;
@@ -2202,138 +2658,6 @@ static int tree_block_processed(u64 bytenr, u32 blocksize,
        return 0;
 }
 
-/*
- * check if there are any file extent pointers in the leaf point to
- * data require processing
- */
-static int check_file_extents(struct reloc_control *rc,
-                             u64 bytenr, u32 blocksize, u64 ptr_gen)
-{
-       struct btrfs_key found_key;
-       struct btrfs_file_extent_item *fi;
-       struct extent_buffer *leaf;
-       u32 nritems;
-       int i;
-       int ret = 0;
-
-       leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen);
-
-       nritems = btrfs_header_nritems(leaf);
-       for (i = 0; i < nritems; i++) {
-               cond_resched();
-               btrfs_item_key_to_cpu(leaf, &found_key, i);
-               if (found_key.type != BTRFS_EXTENT_DATA_KEY)
-                       continue;
-               fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
-               if (btrfs_file_extent_type(leaf, fi) ==
-                   BTRFS_FILE_EXTENT_INLINE)
-                       continue;
-               bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
-               if (bytenr == 0)
-                       continue;
-               if (in_block_group(bytenr, rc->block_group)) {
-                       ret = 1;
-                       break;
-               }
-       }
-       free_extent_buffer(leaf);
-       return ret;
-}
-
-/*
- * scan child blocks of a given block to find blocks require processing
- */
-static int add_child_blocks(struct btrfs_trans_handle *trans,
-                           struct reloc_control *rc,
-                           struct backref_node *node,
-                           struct rb_root *blocks)
-{
-       struct tree_block *block;
-       struct rb_node *rb_node;
-       u64 bytenr;
-       u64 ptr_gen;
-       u32 blocksize;
-       u32 nritems;
-       int i;
-       int err = 0;
-
-       nritems = btrfs_header_nritems(node->eb);
-       blocksize = btrfs_level_size(rc->extent_root, node->level - 1);
-       for (i = 0; i < nritems; i++) {
-               cond_resched();
-               bytenr = btrfs_node_blockptr(node->eb, i);
-               ptr_gen = btrfs_node_ptr_generation(node->eb, i);
-               if (ptr_gen == trans->transid)
-                       continue;
-               if (!in_block_group(bytenr, rc->block_group) &&
-                   (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
-                       continue;
-               if (tree_block_processed(bytenr, blocksize, rc))
-                       continue;
-
-               readahead_tree_block(rc->extent_root,
-                                    bytenr, blocksize, ptr_gen);
-       }
-
-       for (i = 0; i < nritems; i++) {
-               cond_resched();
-               bytenr = btrfs_node_blockptr(node->eb, i);
-               ptr_gen = btrfs_node_ptr_generation(node->eb, i);
-               if (ptr_gen == trans->transid)
-                       continue;
-               if (!in_block_group(bytenr, rc->block_group) &&
-                   (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
-                       continue;
-               if (tree_block_processed(bytenr, blocksize, rc))
-                       continue;
-               if (!in_block_group(bytenr, rc->block_group) &&
-                   !check_file_extents(rc, bytenr, blocksize, ptr_gen))
-                       continue;
-
-               block = kmalloc(sizeof(*block), GFP_NOFS);
-               if (!block) {
-                       err = -ENOMEM;
-                       break;
-               }
-               block->bytenr = bytenr;
-               btrfs_node_key_to_cpu(node->eb, &block->key, i);
-               block->level = node->level - 1;
-               block->key_ready = 1;
-               rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
-               BUG_ON(rb_node);
-       }
-       if (err)
-               free_block_list(blocks);
-       return err;
-}
-
-/*
- * find adjacent blocks require processing
- */
-static noinline_for_stack
-int add_adjacent_blocks(struct btrfs_trans_handle *trans,
-                       struct reloc_control *rc,
-                       struct backref_cache *cache,
-                       struct rb_root *blocks, int level,
-                       struct backref_node **upper)
-{
-       struct backref_node *node;
-       int ret = 0;
-
-       WARN_ON(!list_empty(&cache->pending[level]));
-
-       if (list_empty(&cache->pending[level + 1]))
-               return 1;
-
-       node = list_entry(cache->pending[level + 1].next,
-                         struct backref_node, lower);
-       if (node->eb)
-               ret = add_child_blocks(trans, rc, node, blocks);
-
-       *upper = node;
-       return ret;
-}
-
 static int get_tree_block_key(struct reloc_control *rc,
                              struct tree_block *block)
 {
@@ -2371,40 +2695,53 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
                                struct btrfs_path *path)
 {
        struct btrfs_root *root;
-       int ret;
+       int release = 0;
+       int ret = 0;
 
+       if (!node)
+               return 0;
+
+       BUG_ON(node->processed);
        root = select_one_root(trans, node);
-       if (unlikely(!root)) {
-               rc->found_old_snapshot = 1;
+       if (root == ERR_PTR(-ENOENT)) {
                update_processed_blocks(rc, node);
-               return 0;
+               goto out;
        }
 
-       if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
-               ret = do_relocation(trans, node, key, path, 1);
-               if (ret < 0)
-                       goto out;
-               if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) {
-                       ret = replace_file_extents(trans, rc, root,
-                                                  node->eb, NULL);
-                       if (ret < 0)
-                               goto out;
-               }
-               drop_node_buffer(node);
-       } else if (!root->ref_cows) {
-               path->lowest_level = node->level;
-               ret = btrfs_search_slot(trans, root, key, path, 0, 1);
-               btrfs_release_path(root, path);
-               if (ret < 0)
+       if (!root || root->ref_cows) {
+               ret = reserve_metadata_space(trans, rc, node);
+               if (ret)
                        goto out;
-       } else if (root != node->root) {
-               WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS);
+               release = 1;
        }
 
-       update_processed_blocks(rc, node);
-       ret = 0;
+       if (root) {
+               if (root->ref_cows) {
+                       BUG_ON(node->new_bytenr);
+                       BUG_ON(!list_empty(&node->list));
+                       btrfs_record_root_in_trans(trans, root);
+                       root = root->reloc_root;
+                       node->new_bytenr = root->node->start;
+                       node->root = root;
+                       list_add_tail(&node->list, &rc->backref_cache.changed);
+               } else {
+                       path->lowest_level = node->level;
+                       ret = btrfs_search_slot(trans, root, key, path, 0, 1);
+                       btrfs_release_path(root, path);
+                       if (ret > 0)
+                               ret = 0;
+               }
+               if (!ret)
+                       update_processed_blocks(rc, node);
+       } else {
+               ret = do_relocation(trans, rc, node, key, path, 1);
+       }
 out:
-       drop_node_buffer(node);
+       if (ret || node->level == 0 || node->cowonly) {
+               if (release)
+                       release_metadata_space(rc, node);
+               remove_backref_node(&rc->backref_cache, node);
+       }
        return ret;
 }
 
@@ -2415,12 +2752,10 @@ static noinline_for_stack
 int relocate_tree_blocks(struct btrfs_trans_handle *trans,
                         struct reloc_control *rc, struct rb_root *blocks)
 {
-       struct backref_cache *cache;
        struct backref_node *node;
        struct btrfs_path *path;
        struct tree_block *block;
        struct rb_node *rb_node;
-       int level = -1;
        int ret;
        int err = 0;
 
@@ -2428,21 +2763,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       cache = kmalloc(sizeof(*cache), GFP_NOFS);
-       if (!cache) {
-               btrfs_free_path(path);
-               return -ENOMEM;
-       }
-
-       backref_cache_init(cache);
-
        rb_node = rb_first(blocks);
        while (rb_node) {
                block = rb_entry(rb_node, struct tree_block, rb_node);
-               if (level == -1)
-                       level = block->level;
-               else
-                       BUG_ON(level != block->level);
                if (!block->key_ready)
                        reada_tree_block(rc, block);
                rb_node = rb_next(rb_node);
@@ -2452,100 +2775,83 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
        while (rb_node) {
                block = rb_entry(rb_node, struct tree_block, rb_node);
                if (!block->key_ready)
-                       get_tree_block_key(rc, block);
-               rb_node = rb_next(rb_node);
-       }
-
-       rb_node = rb_first(blocks);
-       while (rb_node) {
-               block = rb_entry(rb_node, struct tree_block, rb_node);
-
-               node = build_backref_tree(rc, cache, &block->key,
-                                         block->level, block->bytenr);
-               if (IS_ERR(node)) {
-                       err = PTR_ERR(node);
-                       goto out;
-               }
-
-               ret = relocate_tree_block(trans, rc, node, &block->key,
-                                         path);
-               if (ret < 0) {
-                       err = ret;
-                       goto out;
-               }
-               remove_backref_node(cache, node);
-               rb_node = rb_next(rb_node);
-       }
-
-       if (level > 0)
-               goto out;
-
-       free_block_list(blocks);
-
-       /*
-        * now backrefs of some upper level tree blocks have been cached,
-        * try relocating blocks referenced by these upper level blocks.
-        */
-       while (1) {
-               struct backref_node *upper = NULL;
-               if (trans->transaction->in_commit ||
-                   trans->transaction->delayed_refs.flushing)
-                       break;
-
-               ret = add_adjacent_blocks(trans, rc, cache, blocks, level,
-                                         &upper);
-               if (ret < 0)
-                       err = ret;
-               if (ret != 0)
-                       break;
+                       get_tree_block_key(rc, block);
+               rb_node = rb_next(rb_node);
+       }
 
-               rb_node = rb_first(blocks);
-               while (rb_node) {
-                       block = rb_entry(rb_node, struct tree_block, rb_node);
-                       if (trans->transaction->in_commit ||
-                           trans->transaction->delayed_refs.flushing)
-                               goto out;
-                       BUG_ON(!block->key_ready);
-                       node = build_backref_tree(rc, cache, &block->key,
-                                                 level, block->bytenr);
-                       if (IS_ERR(node)) {
-                               err = PTR_ERR(node);
-                               goto out;
-                       }
+       rb_node = rb_first(blocks);
+       while (rb_node) {
+               block = rb_entry(rb_node, struct tree_block, rb_node);
 
-                       ret = relocate_tree_block(trans, rc, node,
-                                                 &block->key, path);
-                       if (ret < 0) {
-                               err = ret;
-                               goto out;
-                       }
-                       remove_backref_node(cache, node);
-                       rb_node = rb_next(rb_node);
+               node = build_backref_tree(rc, &block->key,
+                                         block->level, block->bytenr);
+               if (IS_ERR(node)) {
+                       err = PTR_ERR(node);
+                       goto out;
                }
-               free_block_list(blocks);
 
-               if (upper) {
-                       ret = link_to_upper(trans, upper, path);
-                       if (ret < 0) {
+               ret = relocate_tree_block(trans, rc, node, &block->key,
+                                         path);
+               if (ret < 0) {
+                       if (ret != -EAGAIN || rb_node == rb_first(blocks))
                                err = ret;
-                               break;
-                       }
-                       remove_backref_node(cache, upper);
+                       goto out;
                }
+               rb_node = rb_next(rb_node);
        }
 out:
        free_block_list(blocks);
+       err = finish_pending_nodes(trans, rc, path, err);
 
-       ret = finish_pending_nodes(trans, cache, path);
-       if (ret < 0)
-               err = ret;
-
-       kfree(cache);
        btrfs_free_path(path);
        return err;
 }
 
 static noinline_for_stack
+int prealloc_file_extent_cluster(struct inode *inode,
+                                struct file_extent_cluster *cluster)
+{
+       u64 alloc_hint = 0;
+       u64 start;
+       u64 end;
+       u64 offset = BTRFS_I(inode)->index_cnt;
+       u64 num_bytes;
+       int nr = 0;
+       int ret = 0;
+
+       BUG_ON(cluster->start != cluster->boundary[0]);
+       mutex_lock(&inode->i_mutex);
+
+       ret = btrfs_check_data_free_space(inode, cluster->end +
+                                         1 - cluster->start);
+       if (ret)
+               goto out;
+
+       while (nr < cluster->nr) {
+               start = cluster->boundary[nr] - offset;
+               if (nr + 1 < cluster->nr)
+                       end = cluster->boundary[nr + 1] - 1 - offset;
+               else
+                       end = cluster->end - offset;
+
+               lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+               num_bytes = end + 1 - start;
+               ret = btrfs_prealloc_file_range(inode, 0, start,
+                                               num_bytes, num_bytes,
+                                               end + 1, &alloc_hint);
+               unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+               if (ret)
+                       break;
+               nr++;
+       }
+       btrfs_free_reserved_data_space(inode, cluster->end +
+                                      1 - cluster->start);
+out:
+       mutex_unlock(&inode->i_mutex);
+       return ret;
+}
+
+static noinline_for_stack
 int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
                         u64 block_start)
 {
@@ -2588,7 +2894,6 @@ static int relocate_file_extent_cluster(struct inode *inode,
        u64 offset = BTRFS_I(inode)->index_cnt;
        unsigned long index;
        unsigned long last_index;
-       unsigned int dirty_page = 0;
        struct page *page;
        struct file_ra_state *ra;
        int nr = 0;
@@ -2601,21 +2906,24 @@ static int relocate_file_extent_cluster(struct inode *inode,
        if (!ra)
                return -ENOMEM;
 
-       index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
-       last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
+       ret = prealloc_file_extent_cluster(inode, cluster);
+       if (ret)
+               goto out;
 
-       mutex_lock(&inode->i_mutex);
+       file_ra_state_init(ra, inode->i_mapping);
 
-       i_size_write(inode, cluster->end + 1 - offset);
        ret = setup_extent_mapping(inode, cluster->start - offset,
                                   cluster->end - offset, cluster->start);
        if (ret)
-               goto out_unlock;
-
-       file_ra_state_init(ra, inode->i_mapping);
+               goto out;
 
-       WARN_ON(cluster->start != cluster->boundary[0]);
+       index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
+       last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
        while (index <= last_index) {
+               ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
+               if (ret)
+                       goto out;
+
                page = find_lock_page(inode->i_mapping, index);
                if (!page) {
                        page_cache_sync_readahead(inode->i_mapping,
@@ -2623,8 +2931,10 @@ static int relocate_file_extent_cluster(struct inode *inode,
                                                  last_index + 1 - index);
                        page = grab_cache_page(inode->i_mapping, index);
                        if (!page) {
+                               btrfs_delalloc_release_metadata(inode,
+                                                       PAGE_CACHE_SIZE);
                                ret = -ENOMEM;
-                               goto out_unlock;
+                               goto out;
                        }
                }
 
@@ -2640,8 +2950,10 @@ static int relocate_file_extent_cluster(struct inode *inode,
                        if (!PageUptodate(page)) {
                                unlock_page(page);
                                page_cache_release(page);
+                               btrfs_delalloc_release_metadata(inode,
+                                                       PAGE_CACHE_SIZE);
                                ret = -EIO;
-                               goto out_unlock;
+                               goto out;
                        }
                }
 
@@ -2660,10 +2972,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
                                        EXTENT_BOUNDARY, GFP_NOFS);
                        nr++;
                }
-               btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
 
+               btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
                set_page_dirty(page);
-               dirty_page++;
 
                unlock_extent(&BTRFS_I(inode)->io_tree,
                              page_start, page_end, GFP_NOFS);
@@ -2671,20 +2982,11 @@ static int relocate_file_extent_cluster(struct inode *inode,
                page_cache_release(page);
 
                index++;
-               if (nr < cluster->nr &&
-                   page_end + 1 + offset == cluster->boundary[nr]) {
-                       balance_dirty_pages_ratelimited_nr(inode->i_mapping,
-                                                          dirty_page);
-                       dirty_page = 0;
-               }
-       }
-       if (dirty_page) {
-               balance_dirty_pages_ratelimited_nr(inode->i_mapping,
-                                                  dirty_page);
+               balance_dirty_pages_ratelimited(inode->i_mapping);
+               btrfs_throttle(BTRFS_I(inode)->root);
        }
        WARN_ON(nr != cluster->nr);
-out_unlock:
-       mutex_unlock(&inode->i_mutex);
+out:
        kfree(ra);
        return ret;
 }
@@ -2870,9 +3172,6 @@ out:
 static int block_use_full_backref(struct reloc_control *rc,
                                  struct extent_buffer *eb)
 {
-       struct btrfs_path *path;
-       struct btrfs_extent_item *ei;
-       struct btrfs_key key;
        u64 flags;
        int ret;
 
@@ -2880,28 +3179,14 @@ static int block_use_full_backref(struct reloc_control *rc,
            btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
                return 1;
 
-       path = btrfs_alloc_path();
-       BUG_ON(!path);
-
-       key.objectid = eb->start;
-       key.type = BTRFS_EXTENT_ITEM_KEY;
-       key.offset = eb->len;
-
-       path->search_commit_root = 1;
-       path->skip_locking = 1;
-       ret = btrfs_search_slot(NULL, rc->extent_root,
-                               &key, path, 0, 0);
+       ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
+                                      eb->start, eb->len, NULL, &flags);
        BUG_ON(ret);
 
-       ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
-                           struct btrfs_extent_item);
-       flags = btrfs_extent_flags(path->nodes[0], ei);
-       BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
        if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
                ret = 1;
        else
                ret = 0;
-       btrfs_free_path(path);
        return ret;
 }
 
@@ -3074,22 +3359,10 @@ int add_data_references(struct reloc_control *rc,
        struct btrfs_extent_inline_ref *iref;
        unsigned long ptr;
        unsigned long end;
-       u32 blocksize;
+       u32 blocksize = btrfs_level_size(rc->extent_root, 0);
        int ret;
        int err = 0;
 
-       ret = get_new_location(rc->data_inode, NULL, extent_key->objectid,
-                              extent_key->offset);
-       BUG_ON(ret < 0);
-       if (ret > 0) {
-               /* the relocated data is fragmented */
-               rc->extents_skipped++;
-               btrfs_release_path(rc->extent_root, path);
-               return 0;
-       }
-
-       blocksize = btrfs_level_size(rc->extent_root, 0);
-
        eb = path->nodes[0];
        ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
        end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
@@ -3170,7 +3443,8 @@ int add_data_references(struct reloc_control *rc,
  */
 static noinline_for_stack
 int find_next_extent(struct btrfs_trans_handle *trans,
-                    struct reloc_control *rc, struct btrfs_path *path)
+                    struct reloc_control *rc, struct btrfs_path *path,
+                    struct btrfs_key *extent_key)
 {
        struct btrfs_key key;
        struct extent_buffer *leaf;
@@ -3225,6 +3499,7 @@ next:
                        rc->search_start = end + 1;
                } else {
                        rc->search_start = key.objectid + key.offset;
+                       memcpy(extent_key, &key, sizeof(key));
                        return 0;
                }
        }
@@ -3262,12 +3537,49 @@ static int check_extent_flags(u64 flags)
        return 0;
 }
 
+static noinline_for_stack
+int prepare_to_relocate(struct reloc_control *rc)
+{
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root);
+       if (!rc->block_rsv)
+               return -ENOMEM;
+
+       /*
+        * reserve some space for creating reloc trees.
+        * btrfs_init_reloc_root will use them when there
+        * is no reservation in transaction handle.
+        */
+       ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
+                                 rc->extent_root->nodesize * 256,
+                                 &rc->block_rsv_retries);
+       if (ret)
+               return ret;
+
+       rc->block_rsv->refill_used = 1;
+       btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv);
+
+       memset(&rc->cluster, 0, sizeof(rc->cluster));
+       rc->search_start = rc->block_group->key.objectid;
+       rc->extents_found = 0;
+       rc->nodes_relocated = 0;
+       rc->merging_rsv_size = 0;
+       rc->block_rsv_retries = 0;
+
+       rc->create_reloc_tree = 1;
+       set_reloc_control(rc);
+
+       trans = btrfs_join_transaction(rc->extent_root, 1);
+       btrfs_commit_transaction(trans, rc->extent_root);
+       return 0;
+}
 
 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 {
        struct rb_root blocks = RB_ROOT;
        struct btrfs_key key;
-       struct file_extent_cluster *cluster;
        struct btrfs_trans_handle *trans = NULL;
        struct btrfs_path *path;
        struct btrfs_extent_item *ei;
@@ -3277,33 +3589,25 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        int ret;
        int err = 0;
 
-       cluster = kzalloc(sizeof(*cluster), GFP_NOFS);
-       if (!cluster)
-               return -ENOMEM;
-
        path = btrfs_alloc_path();
-       if (!path) {
-               kfree(cluster);
+       if (!path)
                return -ENOMEM;
-       }
-
-       rc->extents_found = 0;
-       rc->extents_skipped = 0;
 
-       rc->search_start = rc->block_group->key.objectid;
-       clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
-                         GFP_NOFS);
-
-       rc->create_reloc_root = 1;
-       set_reloc_control(rc);
-
-       trans = btrfs_start_transaction(rc->extent_root, 1);
-       btrfs_commit_transaction(trans, rc->extent_root);
+       ret = prepare_to_relocate(rc);
+       if (ret) {
+               err = ret;
+               goto out_free;
+       }
 
        while (1) {
-               trans = btrfs_start_transaction(rc->extent_root, 1);
+               trans = btrfs_start_transaction(rc->extent_root, 0);
+
+               if (update_backref_cache(trans, &rc->backref_cache)) {
+                       btrfs_end_transaction(trans, rc->extent_root);
+                       continue;
+               }
 
-               ret = find_next_extent(trans, rc, path);
+               ret = find_next_extent(trans, rc, path, &key);
                if (ret < 0)
                        err = ret;
                if (ret != 0)
@@ -3313,9 +3617,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 
                ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                    struct btrfs_extent_item);
-               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
-               item_size = btrfs_item_size_nr(path->nodes[0],
-                                              path->slots[0]);
+               item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
                if (item_size >= sizeof(*ei)) {
                        flags = btrfs_extent_flags(path->nodes[0], ei);
                        ret = check_extent_flags(flags);
@@ -3356,73 +3658,100 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
                if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
                        ret = add_tree_block(rc, &key, path, &blocks);
                } else if (rc->stage == UPDATE_DATA_PTRS &&
-                        (flags & BTRFS_EXTENT_FLAG_DATA)) {
+                          (flags & BTRFS_EXTENT_FLAG_DATA)) {
                        ret = add_data_references(rc, &key, path, &blocks);
                } else {
                        btrfs_release_path(rc->extent_root, path);
                        ret = 0;
                }
                if (ret < 0) {
-                       err = 0;
+                       err = ret;
                        break;
                }
 
                if (!RB_EMPTY_ROOT(&blocks)) {
                        ret = relocate_tree_blocks(trans, rc, &blocks);
                        if (ret < 0) {
+                               if (ret != -EAGAIN) {
+                                       err = ret;
+                                       break;
+                               }
+                               rc->extents_found--;
+                               rc->search_start = key.objectid;
+                       }
+               }
+
+               ret = btrfs_block_rsv_check(trans, rc->extent_root,
+                                           rc->block_rsv, 0, 5);
+               if (ret < 0) {
+                       if (ret != -EAGAIN) {
                                err = ret;
+                               WARN_ON(1);
                                break;
                        }
+                       rc->commit_transaction = 1;
                }
 
-               nr = trans->blocks_used;
-               btrfs_end_transaction(trans, rc->extent_root);
+               if (rc->commit_transaction) {
+                       rc->commit_transaction = 0;
+                       ret = btrfs_commit_transaction(trans, rc->extent_root);
+                       BUG_ON(ret);
+               } else {
+                       nr = trans->blocks_used;
+                       btrfs_end_transaction_throttle(trans, rc->extent_root);
+                       btrfs_btree_balance_dirty(rc->extent_root, nr);
+               }
                trans = NULL;
-               btrfs_btree_balance_dirty(rc->extent_root, nr);
 
                if (rc->stage == MOVE_DATA_EXTENTS &&
                    (flags & BTRFS_EXTENT_FLAG_DATA)) {
                        rc->found_file_extent = 1;
                        ret = relocate_data_extent(rc->data_inode,
-                                                  &key, cluster);
+                                                  &key, &rc->cluster);
                        if (ret < 0) {
                                err = ret;
                                break;
                        }
                }
        }
-       btrfs_free_path(path);
+
+       btrfs_release_path(rc->extent_root, path);
+       clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
+                         GFP_NOFS);
 
        if (trans) {
                nr = trans->blocks_used;
-               btrfs_end_transaction(trans, rc->extent_root);
+               btrfs_end_transaction_throttle(trans, rc->extent_root);
                btrfs_btree_balance_dirty(rc->extent_root, nr);
        }
 
        if (!err) {
-               ret = relocate_file_extent_cluster(rc->data_inode, cluster);
+               ret = relocate_file_extent_cluster(rc->data_inode,
+                                                  &rc->cluster);
                if (ret < 0)
                        err = ret;
        }
 
-       kfree(cluster);
+       rc->create_reloc_tree = 0;
+       set_reloc_control(rc);
 
-       rc->create_reloc_root = 0;
-       smp_mb();
+       backref_cache_cleanup(&rc->backref_cache);
+       btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
 
-       if (rc->extents_found > 0) {
-               trans = btrfs_start_transaction(rc->extent_root, 1);
-               btrfs_commit_transaction(trans, rc->extent_root);
-       }
+       err = prepare_to_merge(rc, err);
 
        merge_reloc_roots(rc);
 
+       rc->merge_reloc_tree = 0;
        unset_reloc_control(rc);
+       btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
 
        /* get rid of pinned extents */
-       trans = btrfs_start_transaction(rc->extent_root, 1);
+       trans = btrfs_join_transaction(rc->extent_root, 1);
        btrfs_commit_transaction(trans, rc->extent_root);
-
+out_free:
+       btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
+       btrfs_free_path(path);
        return err;
 }
 
@@ -3448,7 +3777,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
        btrfs_set_inode_generation(leaf, item, 1);
        btrfs_set_inode_size(leaf, item, 0);
        btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
-       btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
+       btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
+                                         BTRFS_INODE_PREALLOC);
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(root, path);
 out:
@@ -3460,8 +3790,9 @@ out:
  * helper to create inode for data relocation.
  * the inode is in data relocation tree and its link count is 0
  */
-static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
-                                       struct btrfs_block_group_cache *group)
+static noinline_for_stack
+struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
+                                struct btrfs_block_group_cache *group)
 {
        struct inode *inode = NULL;
        struct btrfs_trans_handle *trans;
@@ -3475,8 +3806,9 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
        if (IS_ERR(root))
                return ERR_CAST(root);
 
-       trans = btrfs_start_transaction(root, 1);
-       BUG_ON(!trans);
+       trans = btrfs_start_transaction(root, 6);
+       if (IS_ERR(trans))
+               return ERR_CAST(trans);
 
        err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
        if (err)
@@ -3496,7 +3828,6 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
 out:
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
-
        btrfs_btree_balance_dirty(root, nr);
        if (err) {
                if (inode)
@@ -3506,6 +3837,21 @@ out:
        return inode;
 }
 
+static struct reloc_control *alloc_reloc_control(void)
+{
+       struct reloc_control *rc;
+
+       rc = kzalloc(sizeof(*rc), GFP_NOFS);
+       if (!rc)
+               return NULL;
+
+       INIT_LIST_HEAD(&rc->reloc_roots);
+       backref_cache_init(&rc->backref_cache);
+       mapping_tree_init(&rc->reloc_root_tree);
+       extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
+       return rc;
+}
+
 /*
  * function to relocate all extents in a block group.
  */
@@ -3514,24 +3860,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
        struct btrfs_fs_info *fs_info = extent_root->fs_info;
        struct reloc_control *rc;
        int ret;
+       int rw = 0;
        int err = 0;
 
-       rc = kzalloc(sizeof(*rc), GFP_NOFS);
+       rc = alloc_reloc_control();
        if (!rc)
                return -ENOMEM;
 
-       mapping_tree_init(&rc->reloc_root_tree);
-       extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
-       INIT_LIST_HEAD(&rc->reloc_roots);
+       rc->extent_root = extent_root;
 
        rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
        BUG_ON(!rc->block_group);
 
-       btrfs_init_workers(&rc->workers, "relocate",
-                          fs_info->thread_pool_size, NULL);
-
-       rc->extent_root = extent_root;
-       btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
+       if (!rc->block_group->ro) {
+               ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
+               if (ret) {
+                       err = ret;
+                       goto out;
+               }
+               rw = 1;
+       }
 
        rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
        if (IS_ERR(rc->data_inode)) {
@@ -3548,9 +3896,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
        btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
 
        while (1) {
-               rc->extents_found = 0;
-               rc->extents_skipped = 0;
-
                mutex_lock(&fs_info->cleaner_mutex);
 
                btrfs_clean_old_snapshots(fs_info->tree_root);
@@ -3559,7 +3904,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                mutex_unlock(&fs_info->cleaner_mutex);
                if (ret < 0) {
                        err = ret;
-                       break;
+                       goto out;
                }
 
                if (rc->extents_found == 0)
@@ -3573,18 +3918,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                        invalidate_mapping_pages(rc->data_inode->i_mapping,
                                                 0, -1);
                        rc->stage = UPDATE_DATA_PTRS;
-               } else if (rc->stage == UPDATE_DATA_PTRS &&
-                          rc->extents_skipped >= rc->extents_found) {
-                       iput(rc->data_inode);
-                       rc->data_inode = create_reloc_inode(fs_info,
-                                                           rc->block_group);
-                       if (IS_ERR(rc->data_inode)) {
-                               err = PTR_ERR(rc->data_inode);
-                               rc->data_inode = NULL;
-                               break;
-                       }
-                       rc->stage = MOVE_DATA_EXTENTS;
-                       rc->found_file_extent = 0;
                }
        }
 
@@ -3597,8 +3930,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
        WARN_ON(rc->block_group->reserved > 0);
        WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
 out:
+       if (err && rw)
+               btrfs_set_block_group_rw(extent_root, rc->block_group);
        iput(rc->data_inode);
-       btrfs_stop_workers(&rc->workers);
        btrfs_put_block_group(rc->block_group);
        kfree(rc);
        return err;
@@ -3609,7 +3943,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
        struct btrfs_trans_handle *trans;
        int ret;
 
-       trans = btrfs_start_transaction(root->fs_info->tree_root, 1);
+       trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
 
        memset(&root->root_item.drop_progress, 0,
                sizeof(root->root_item.drop_progress));
@@ -3702,20 +4036,20 @@ int btrfs_recover_relocation(struct btrfs_root *root)
        if (list_empty(&reloc_roots))
                goto out;
 
-       rc = kzalloc(sizeof(*rc), GFP_NOFS);
+       rc = alloc_reloc_control();
        if (!rc) {
                err = -ENOMEM;
                goto out;
        }
 
-       mapping_tree_init(&rc->reloc_root_tree);
-       INIT_LIST_HEAD(&rc->reloc_roots);
-       btrfs_init_workers(&rc->workers, "relocate",
-                          root->fs_info->thread_pool_size, NULL);
        rc->extent_root = root->fs_info->extent_root;
 
        set_reloc_control(rc);
 
+       trans = btrfs_join_transaction(rc->extent_root, 1);
+
+       rc->merge_reloc_tree = 1;
+
        while (!list_empty(&reloc_roots)) {
                reloc_root = list_entry(reloc_roots.next,
                                        struct btrfs_root, root_list);
@@ -3735,20 +4069,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
                fs_root->reloc_root = reloc_root;
        }
 
-       trans = btrfs_start_transaction(rc->extent_root, 1);
        btrfs_commit_transaction(trans, rc->extent_root);
 
        merge_reloc_roots(rc);
 
        unset_reloc_control(rc);
 
-       trans = btrfs_start_transaction(rc->extent_root, 1);
+       trans = btrfs_join_transaction(rc->extent_root, 1);
        btrfs_commit_transaction(trans, rc->extent_root);
 out:
-       if (rc) {
-               btrfs_stop_workers(&rc->workers);
-               kfree(rc);
-       }
+       kfree(rc);
        while (!list_empty(&reloc_roots)) {
                reloc_root = list_entry(reloc_roots.next,
                                        struct btrfs_root, root_list);
@@ -3814,3 +4144,130 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
        btrfs_put_ordered_extent(ordered);
        return 0;
 }
+
+void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root, struct extent_buffer *buf,
+                          struct extent_buffer *cow)
+{
+       struct reloc_control *rc;
+       struct backref_node *node;
+       int first_cow = 0;
+       int level;
+       int ret;
+
+       rc = root->fs_info->reloc_ctl;
+       if (!rc)
+               return;
+
+       BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
+              root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
+
+       level = btrfs_header_level(buf);
+       if (btrfs_header_generation(buf) <=
+           btrfs_root_last_snapshot(&root->root_item))
+               first_cow = 1;
+
+       if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
+           rc->create_reloc_tree) {
+               WARN_ON(!first_cow && level == 0);
+
+               node = rc->backref_cache.path[level];
+               BUG_ON(node->bytenr != buf->start &&
+                      node->new_bytenr != buf->start);
+
+               drop_node_buffer(node);
+               extent_buffer_get(cow);
+               node->eb = cow;
+               node->new_bytenr = cow->start;
+
+               if (!node->pending) {
+                       list_move_tail(&node->list,
+                                      &rc->backref_cache.pending[level]);
+                       node->pending = 1;
+               }
+
+               if (first_cow)
+                       __mark_block_processed(rc, node);
+
+               if (first_cow && level > 0)
+                       rc->nodes_relocated += buf->len;
+       }
+
+       if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) {
+               ret = replace_file_extents(trans, rc, root, cow);
+               BUG_ON(ret);
+       }
+}
+
+/*
+ * called before creating snapshot. it calculates metadata reservation
+ * requried for relocating tree blocks in the snapshot
+ */
+void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
+                             struct btrfs_pending_snapshot *pending,
+                             u64 *bytes_to_reserve)
+{
+       struct btrfs_root *root;
+       struct reloc_control *rc;
+
+       root = pending->root;
+       if (!root->reloc_root)
+               return;
+
+       rc = root->fs_info->reloc_ctl;
+       if (!rc->merge_reloc_tree)
+               return;
+
+       root = root->reloc_root;
+       BUG_ON(btrfs_root_refs(&root->root_item) == 0);
+       /*
+        * relocation is in the stage of merging trees. the space
+        * used by merging a reloc tree is twice the size of
+        * relocated tree nodes in the worst case. half for cowing
+        * the reloc tree, half for cowing the fs tree. the space
+        * used by cowing the reloc tree will be freed after the
+        * tree is dropped. if we create snapshot, cowing the fs
+        * tree may use more space than it frees. so we need
+        * reserve extra space.
+        */
+       *bytes_to_reserve += rc->nodes_relocated;
+}
+
+/*
+ * called after snapshot is created. migrate block reservation
+ * and create reloc root for the newly created snapshot
+ */
+void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
+                              struct btrfs_pending_snapshot *pending)
+{
+       struct btrfs_root *root = pending->root;
+       struct btrfs_root *reloc_root;
+       struct btrfs_root *new_root;
+       struct reloc_control *rc;
+       int ret;
+
+       if (!root->reloc_root)
+               return;
+
+       rc = root->fs_info->reloc_ctl;
+       rc->merging_rsv_size += rc->nodes_relocated;
+
+       if (rc->merge_reloc_tree) {
+               ret = btrfs_block_rsv_migrate(&pending->block_rsv,
+                                             rc->block_rsv,
+                                             rc->nodes_relocated);
+               BUG_ON(ret);
+       }
+
+       new_root = pending->snap;
+       reloc_root = create_reloc_root(trans, root->reloc_root,
+                                      new_root->root_key.objectid);
+
+       __add_reloc_root(reloc_root);
+       new_root->reloc_root = reloc_root;
+
+       if (rc->create_reloc_tree) {
+               ret = clone_backref_node(trans, rc, root, reloc_root);
+               BUG_ON(ret);
+       }
+}
index 67fa2d2..b91ccd9 100644 (file)
@@ -259,6 +259,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
        struct extent_buffer *leaf;
        struct btrfs_path *path;
        struct btrfs_key key;
+       struct btrfs_key root_key;
+       struct btrfs_root *root;
        int err = 0;
        int ret;
 
@@ -270,6 +272,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
        key.type = BTRFS_ORPHAN_ITEM_KEY;
        key.offset = 0;
 
+       root_key.type = BTRFS_ROOT_ITEM_KEY;
+       root_key.offset = (u64)-1;
+
        while (1) {
                ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
                if (ret < 0) {
@@ -294,13 +299,25 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
                    key.type != BTRFS_ORPHAN_ITEM_KEY)
                        break;
 
-               ret = btrfs_find_dead_roots(tree_root, key.offset);
-               if (ret) {
+               root_key.objectid = key.offset;
+               key.offset++;
+
+               root = btrfs_read_fs_root_no_name(tree_root->fs_info,
+                                                 &root_key);
+               if (!IS_ERR(root))
+                       continue;
+
+               ret = PTR_ERR(root);
+               if (ret != -ENOENT) {
                        err = ret;
                        break;
                }
 
-               key.offset++;
+               ret = btrfs_find_dead_roots(tree_root, root_key.objectid);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
        }
 
        btrfs_free_path(path);
index 2909a03..d34b2df 100644 (file)
@@ -498,7 +498,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
        btrfs_start_delalloc_inodes(root, 0);
        btrfs_wait_ordered_extents(root, 0, 0);
 
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_start_transaction(root, 0);
        ret = btrfs_commit_transaction(trans, root);
        return ret;
 }
@@ -694,11 +694,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
                        return -EINVAL;
 
-               /* recover relocation */
-               ret = btrfs_recover_relocation(root);
+               ret = btrfs_cleanup_fs_roots(root->fs_info);
                WARN_ON(ret);
 
-               ret = btrfs_cleanup_fs_roots(root->fs_info);
+               /* recover relocation */
+               ret = btrfs_recover_relocation(root);
                WARN_ON(ret);
 
                sb->s_flags &= ~MS_RDONLY;
@@ -714,34 +714,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct list_head *head = &root->fs_info->space_info;
        struct btrfs_space_info *found;
        u64 total_used = 0;
-       u64 data_used = 0;
        int bits = dentry->d_sb->s_blocksize_bits;
        __be32 *fsid = (__be32 *)root->fs_info->fsid;
 
        rcu_read_lock();
-       list_for_each_entry_rcu(found, head, list) {
-               if (found->flags & (BTRFS_BLOCK_GROUP_DUP|
-                                   BTRFS_BLOCK_GROUP_RAID10|
-                                   BTRFS_BLOCK_GROUP_RAID1)) {
-                       total_used += found->bytes_used;
-                       if (found->flags & BTRFS_BLOCK_GROUP_DATA)
-                               data_used += found->bytes_used;
-                       else
-                               data_used += found->total_bytes;
-               }
-
-               total_used += found->bytes_used;
-               if (found->flags & BTRFS_BLOCK_GROUP_DATA)
-                       data_used += found->bytes_used;
-               else
-                       data_used += found->total_bytes;
-       }
+       list_for_each_entry_rcu(found, head, list)
+               total_used += found->disk_used;
        rcu_read_unlock();
 
        buf->f_namelen = BTRFS_NAME_LEN;
        buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
        buf->f_bfree = buf->f_blocks - (total_used >> bits);
-       buf->f_bavail = buf->f_blocks - (data_used >> bits);
+       buf->f_bavail = buf->f_bfree;
        buf->f_bsize = dentry->d_sb->s_blocksize;
        buf->f_type = BTRFS_SUPER_MAGIC;
 
index 2cb1160..66e4c66 100644 (file)
@@ -165,54 +165,89 @@ enum btrfs_trans_type {
        TRANS_USERSPACE,
 };
 
+static int may_wait_transaction(struct btrfs_root *root, int type)
+{
+       if (!root->fs_info->log_root_recovering &&
+           ((type == TRANS_START && !root->fs_info->open_ioctl_trans) ||
+            type == TRANS_USERSPACE))
+               return 1;
+       return 0;
+}
+
 static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
-                                            int num_blocks, int type)
+                                                   u64 num_items, int type)
 {
-       struct btrfs_trans_handle *h =
-               kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
+       struct btrfs_trans_handle *h;
+       struct btrfs_transaction *cur_trans;
+       int retries = 0;
        int ret;
+again:
+       h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
+       if (!h)
+               return ERR_PTR(-ENOMEM);
 
        mutex_lock(&root->fs_info->trans_mutex);
-       if (!root->fs_info->log_root_recovering &&
-           ((type == TRANS_START && !root->fs_info->open_ioctl_trans) ||
-            type == TRANS_USERSPACE))
+       if (may_wait_transaction(root, type))
                wait_current_trans(root);
+
        ret = join_transaction(root);
        BUG_ON(ret);
 
-       h->transid = root->fs_info->running_transaction->transid;
-       h->transaction = root->fs_info->running_transaction;
-       h->blocks_reserved = num_blocks;
+       cur_trans = root->fs_info->running_transaction;
+       cur_trans->use_count++;
+       mutex_unlock(&root->fs_info->trans_mutex);
+
+       h->transid = cur_trans->transid;
+       h->transaction = cur_trans;
        h->blocks_used = 0;
        h->block_group = 0;
-       h->alloc_exclude_nr = 0;
-       h->alloc_exclude_start = 0;
+       h->bytes_reserved = 0;
        h->delayed_ref_updates = 0;
+       h->block_rsv = NULL;
 
-       if (!current->journal_info && type != TRANS_USERSPACE)
-               current->journal_info = h;
+       smp_mb();
+       if (cur_trans->blocked && may_wait_transaction(root, type)) {
+               btrfs_commit_transaction(h, root);
+               goto again;
+       }
+
+       if (num_items > 0) {
+               ret = btrfs_trans_reserve_metadata(h, root, num_items,
+                                                  &retries);
+               if (ret == -EAGAIN) {
+                       btrfs_commit_transaction(h, root);
+                       goto again;
+               }
+               if (ret < 0) {
+                       btrfs_end_transaction(h, root);
+                       return ERR_PTR(ret);
+               }
+       }
 
-       root->fs_info->running_transaction->use_count++;
+       mutex_lock(&root->fs_info->trans_mutex);
        record_root_in_trans(h, root);
        mutex_unlock(&root->fs_info->trans_mutex);
+
+       if (!current->journal_info && type != TRANS_USERSPACE)
+               current->journal_info = h;
        return h;
 }
 
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
-                                                  int num_blocks)
+                                                  int num_items)
 {
-       return start_transaction(root, num_blocks, TRANS_START);
+       return start_transaction(root, num_items, TRANS_START);
 }
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
                                                   int num_blocks)
 {
-       return start_transaction(root, num_blocks, TRANS_JOIN);
+       return start_transaction(root, 0, TRANS_JOIN);
 }
 
 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
                                                         int num_blocks)
 {
-       return start_transaction(r, num_blocks, TRANS_USERSPACE);
+       return start_transaction(r, 0, TRANS_USERSPACE);
 }
 
 /* wait for a transaction commit to be fully complete */
@@ -286,10 +321,36 @@ void btrfs_throttle(struct btrfs_root *root)
        mutex_unlock(&root->fs_info->trans_mutex);
 }
 
+static int should_end_transaction(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root)
+{
+       int ret;
+       ret = btrfs_block_rsv_check(trans, root,
+                                   &root->fs_info->global_block_rsv, 0, 5);
+       return ret ? 1 : 0;
+}
+
+int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root)
+{
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       int updates;
+
+       if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
+               return 1;
+
+       updates = trans->delayed_ref_updates;
+       trans->delayed_ref_updates = 0;
+       if (updates)
+               btrfs_run_delayed_refs(trans, root, updates);
+
+       return should_end_transaction(trans, root);
+}
+
 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, int throttle)
 {
-       struct btrfs_transaction *cur_trans;
+       struct btrfs_transaction *cur_trans = trans->transaction;
        struct btrfs_fs_info *info = root->fs_info;
        int count = 0;
 
@@ -313,9 +374,21 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                count++;
        }
 
+       btrfs_trans_release_metadata(trans, root);
+
+       if (!root->fs_info->open_ioctl_trans &&
+           should_end_transaction(trans, root))
+               trans->transaction->blocked = 1;
+
+       if (cur_trans->blocked && !cur_trans->in_commit) {
+               if (throttle)
+                       return btrfs_commit_transaction(trans, root);
+               else
+                       wake_up_process(info->transaction_kthread);
+       }
+
        mutex_lock(&info->trans_mutex);
-       cur_trans = info->running_transaction;
-       WARN_ON(cur_trans != trans->transaction);
+       WARN_ON(cur_trans != info->running_transaction);
        WARN_ON(cur_trans->num_writers < 1);
        cur_trans->num_writers--;
 
@@ -603,6 +676,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 
                        btrfs_free_log(trans, root);
                        btrfs_update_reloc_root(trans, root);
+                       btrfs_orphan_commit_root(trans, root);
 
                        if (root->commit_root != root->node) {
                                switch_commit_root(root);
@@ -627,30 +701,30 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 {
        struct btrfs_fs_info *info = root->fs_info;
-       int ret;
        struct btrfs_trans_handle *trans;
+       int ret;
        unsigned long nr;
 
-       smp_mb();
-       if (root->defrag_running)
+       if (xchg(&root->defrag_running, 1))
                return 0;
-       trans = btrfs_start_transaction(root, 1);
+
        while (1) {
-               root->defrag_running = 1;
+               trans = btrfs_start_transaction(root, 0);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
+
                ret = btrfs_defrag_leaves(trans, root, cacheonly);
+
                nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
                btrfs_btree_balance_dirty(info->tree_root, nr);
                cond_resched();
 
-               trans = btrfs_start_transaction(root, 1);
                if (root->fs_info->closing || ret != -EAGAIN)
                        break;
        }
        root->defrag_running = 0;
-       smp_mb();
-       btrfs_end_transaction(trans, root);
-       return 0;
+       return ret;
 }
 
 #if 0
@@ -758,47 +832,63 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        struct btrfs_root *root = pending->root;
        struct btrfs_root *parent_root;
        struct inode *parent_inode;
+       struct dentry *dentry;
        struct extent_buffer *tmp;
        struct extent_buffer *old;
        int ret;
-       u64 objectid;
-       int namelen;
+       int retries = 0;
+       u64 to_reserve = 0;
        u64 index = 0;
-
-       parent_inode = pending->dentry->d_parent->d_inode;
-       parent_root = BTRFS_I(parent_inode)->root;
+       u64 objectid;
 
        new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
        if (!new_root_item) {
-               ret = -ENOMEM;
+               pending->error = -ENOMEM;
                goto fail;
        }
+
        ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
-       if (ret)
+       if (ret) {
+               pending->error = ret;
                goto fail;
+       }
+
+       btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
+       btrfs_orphan_pre_snapshot(trans, pending, &to_reserve);
+
+       if (to_reserve > 0) {
+               ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
+                                         to_reserve, &retries);
+               if (ret) {
+                       pending->error = ret;
+                       goto fail;
+               }
+       }
 
        key.objectid = objectid;
-       /* record when the snapshot was created in key.offset */
-       key.offset = trans->transid;
-       btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
+       key.offset = (u64)-1;
+       key.type = BTRFS_ROOT_ITEM_KEY;
 
-       memcpy(&pending->root_key, &key, sizeof(key));
-       pending->root_key.offset = (u64)-1;
+       trans->block_rsv = &pending->block_rsv;
 
+       dentry = pending->dentry;
+       parent_inode = dentry->d_parent->d_inode;
+       parent_root = BTRFS_I(parent_inode)->root;
        record_root_in_trans(trans, parent_root);
+
        /*
         * insert the directory item
         */
-       namelen = strlen(pending->name);
        ret = btrfs_set_inode_index(parent_inode, &index);
        BUG_ON(ret);
        ret = btrfs_insert_dir_item(trans, parent_root,
-                           pending->name, namelen,
-                           parent_inode->i_ino,
-                           &pending->root_key, BTRFS_FT_DIR, index);
+                               dentry->d_name.name, dentry->d_name.len,
+                               parent_inode->i_ino, &key,
+                               BTRFS_FT_DIR, index);
        BUG_ON(ret);
 
-       btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2);
+       btrfs_i_size_write(parent_inode, parent_inode->i_size +
+                                        dentry->d_name.len * 2);
        ret = btrfs_update_inode(trans, parent_root, parent_inode);
        BUG_ON(ret);
 
@@ -815,22 +905,32 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        free_extent_buffer(old);
 
        btrfs_set_root_node(new_root_item, tmp);
-       ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
-                               new_root_item);
-       BUG_ON(ret);
+       /* record when the snapshot was created in key.offset */
+       key.offset = trans->transid;
+       ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
        btrfs_tree_unlock(tmp);
        free_extent_buffer(tmp);
+       BUG_ON(ret);
 
-       ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
-                                pending->root_key.objectid,
+       /*
+        * insert root back/forward references
+        */
+       ret = btrfs_add_root_ref(trans, tree_root, objectid,
                                 parent_root->root_key.objectid,
-                                parent_inode->i_ino, index, pending->name,
-                                namelen);
+                                parent_inode->i_ino, index,
+                                dentry->d_name.name, dentry->d_name.len);
        BUG_ON(ret);
 
+       key.offset = (u64)-1;
+       pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
+       BUG_ON(IS_ERR(pending->snap));
+
+       btrfs_reloc_post_snapshot(trans, pending);
+       btrfs_orphan_post_snapshot(trans, pending);
 fail:
        kfree(new_root_item);
-       return ret;
+       btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
+       return 0;
 }
 
 /*
@@ -878,6 +978,16 @@ int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
        return ret;
 }
 
+int btrfs_transaction_blocked(struct btrfs_fs_info *info)
+{
+       int ret = 0;
+       spin_lock(&info->new_trans_lock);
+       if (info->running_transaction)
+               ret = info->running_transaction->blocked;
+       spin_unlock(&info->new_trans_lock);
+       return ret;
+}
+
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root)
 {
@@ -899,6 +1009,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        ret = btrfs_run_delayed_refs(trans, root, 0);
        BUG_ON(ret);
 
+       btrfs_trans_release_metadata(trans, root);
+
        cur_trans = trans->transaction;
        /*
         * set the flushing flag so procs in this transaction have to
@@ -951,9 +1063,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                        snap_pending = 1;
 
                WARN_ON(cur_trans != trans->transaction);
-               prepare_to_wait(&cur_trans->writer_wait, &wait,
-                               TASK_UNINTERRUPTIBLE);
-
                if (cur_trans->num_writers > 1)
                        timeout = MAX_SCHEDULE_TIMEOUT;
                else if (should_grow)
@@ -976,6 +1085,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                 */
                btrfs_run_ordered_operations(root, 1);
 
+               prepare_to_wait(&cur_trans->writer_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+
                smp_mb();
                if (cur_trans->num_writers > 1 || should_grow)
                        schedule_timeout(timeout);
@@ -1103,9 +1215,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
 
                if (btrfs_header_backref_rev(root->node) <
                    BTRFS_MIXED_BACKREF_REV)
-                       btrfs_drop_snapshot(root, 0);
+                       btrfs_drop_snapshot(root, NULL, 0);
                else
-                       btrfs_drop_snapshot(root, 1);
+                       btrfs_drop_snapshot(root, NULL, 1);
        }
        return 0;
 }
index 93c7ccb..e104986 100644 (file)
@@ -45,20 +45,23 @@ struct btrfs_transaction {
 
 struct btrfs_trans_handle {
        u64 transid;
+       u64 block_group;
+       u64 bytes_reserved;
        unsigned long blocks_reserved;
        unsigned long blocks_used;
-       struct btrfs_transaction *transaction;
-       u64 block_group;
-       u64 alloc_exclude_start;
-       u64 alloc_exclude_nr;
        unsigned long delayed_ref_updates;
+       struct btrfs_transaction *transaction;
+       struct btrfs_block_rsv *block_rsv;
 };
 
 struct btrfs_pending_snapshot {
        struct dentry *dentry;
        struct btrfs_root *root;
-       char *name;
-       struct btrfs_key root_key;
+       struct btrfs_root *snap;
+       /* block reservation for the operation */
+       struct btrfs_block_rsv block_rsv;
+       /* extra metadata reseration for relocation */
+       int error;
        struct list_head list;
 };
 
@@ -85,11 +88,11 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
-                                                  int num_blocks);
+                                                  int num_items);
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
-                                                  int num_blocks);
+                                                 int num_blocks);
 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
-                                                  int num_blocks);
+                                                        int num_blocks);
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root);
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -103,6 +106,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root);
+int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root);
 void btrfs_throttle(struct btrfs_root *root);
 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
@@ -112,5 +117,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
                                struct extent_io_tree *dirty_pages, int mark);
 int btrfs_wait_marked_extents(struct btrfs_root *root,
                                struct extent_io_tree *dirty_pages, int mark);
+int btrfs_transaction_blocked(struct btrfs_fs_info *info);
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
 #endif
index b10eacd..f7ac8e0 100644 (file)
@@ -117,13 +117,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
                                 path->nodes[1], 0,
                                 cache_only, &last_ret,
                                 &root->defrag_progress);
-       WARN_ON(ret && ret != -EAGAIN);
+       if (ret) {
+               WARN_ON(ret == -EAGAIN);
+               goto out;
+       }
        if (next_key_ret == 0) {
                memcpy(&root->defrag_progress, &key, sizeof(key));
                ret = -EAGAIN;
        }
-
-       btrfs_release_path(root, path);
 out:
        if (path)
                btrfs_free_path(path);
index af57dd2..fb102a9 100644 (file)
@@ -135,6 +135,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root)
 {
        int ret;
+       int err = 0;
 
        mutex_lock(&root->log_mutex);
        if (root->log_root) {
@@ -155,17 +156,19 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
        mutex_lock(&root->fs_info->tree_log_mutex);
        if (!root->fs_info->log_root_tree) {
                ret = btrfs_init_log_root_tree(trans, root->fs_info);
-               BUG_ON(ret);
+               if (ret)
+                       err = ret;
        }
-       if (!root->log_root) {
+       if (err == 0 && !root->log_root) {
                ret = btrfs_add_log_tree(trans, root);
-               BUG_ON(ret);
+               if (ret)
+                       err = ret;
        }
        mutex_unlock(&root->fs_info->tree_log_mutex);
        root->log_batch++;
        atomic_inc(&root->log_writers);
        mutex_unlock(&root->log_mutex);
-       return 0;
+       return err;
 }
 
 /*
@@ -376,7 +379,7 @@ insert:
                        BUG_ON(ret);
                }
        } else if (ret) {
-               BUG();
+               return ret;
        }
        dst_ptr = btrfs_item_ptr_offset(path->nodes[0],
                                        path->slots[0]);
@@ -1699,9 +1702,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
                next = btrfs_find_create_tree_block(root, bytenr, blocksize);
 
-               wc->process_func(root, next, wc, ptr_gen);
-
                if (*level == 1) {
+                       wc->process_func(root, next, wc, ptr_gen);
+
                        path->slots[*level]++;
                        if (wc->free) {
                                btrfs_read_buffer(next, ptr_gen);
@@ -1734,35 +1737,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
        WARN_ON(*level < 0);
        WARN_ON(*level >= BTRFS_MAX_LEVEL);
 
-       if (path->nodes[*level] == root->node)
-               parent = path->nodes[*level];
-       else
-               parent = path->nodes[*level + 1];
-
-       bytenr = path->nodes[*level]->start;
-
-       blocksize = btrfs_level_size(root, *level);
-       root_owner = btrfs_header_owner(parent);
-       root_gen = btrfs_header_generation(parent);
-
-       wc->process_func(root, path->nodes[*level], wc,
-                        btrfs_header_generation(path->nodes[*level]));
-
-       if (wc->free) {
-               next = path->nodes[*level];
-               btrfs_tree_lock(next);
-               clean_tree_block(trans, root, next);
-               btrfs_set_lock_blocking(next);
-               btrfs_wait_tree_block_writeback(next);
-               btrfs_tree_unlock(next);
-
-               WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
-               ret = btrfs_free_reserved_extent(root, bytenr, blocksize);
-               BUG_ON(ret);
-       }
-       free_extent_buffer(path->nodes[*level]);
-       path->nodes[*level] = NULL;
-       *level += 1;
+       path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
 
        cond_resched();
        return 0;
@@ -1781,7 +1756,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 
        for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
                slot = path->slots[i];
-               if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
+               if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
                        struct extent_buffer *node;
                        node = path->nodes[i];
                        path->slots[i]++;
@@ -2047,7 +2022,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        mutex_unlock(&log_root_tree->log_mutex);
 
        ret = update_log_root(trans, log);
-       BUG_ON(ret);
 
        mutex_lock(&log_root_tree->log_mutex);
        if (atomic_dec_and_test(&log_root_tree->log_writers)) {
@@ -2056,6 +2030,15 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                        wake_up(&log_root_tree->log_writer_wait);
        }
 
+       if (ret) {
+               BUG_ON(ret != -ENOSPC);
+               root->fs_info->last_trans_log_full_commit = trans->transid;
+               btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+               mutex_unlock(&log_root_tree->log_mutex);
+               ret = -EAGAIN;
+               goto out;
+       }
+
        index2 = log_root_tree->log_transid % 2;
        if (atomic_read(&log_root_tree->log_commit[index2])) {
                btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
@@ -2129,15 +2112,10 @@ out:
        return 0;
 }
 
-/*
- * free all the extents used by the tree log.  This should be called
- * at commit time of the full transaction
- */
-int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
+static void free_log_tree(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *log)
 {
        int ret;
-       struct btrfs_root *log;
-       struct key;
        u64 start;
        u64 end;
        struct walk_control wc = {
@@ -2145,10 +2123,6 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
                .process_func = process_one_buffer
        };
 
-       if (!root->log_root || root->fs_info->log_root_recovering)
-               return 0;
-
-       log = root->log_root;
        ret = walk_log_tree(trans, log, &wc);
        BUG_ON(ret);
 
@@ -2162,14 +2136,30 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
                                  EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
        }
 
-       if (log->log_transid > 0) {
-               ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
-                                    &log->root_key);
-               BUG_ON(ret);
-       }
-       root->log_root = NULL;
        free_extent_buffer(log->node);
        kfree(log);
+}
+
+/*
+ * free all the extents used by the tree log.  This should be called
+ * at commit time of the full transaction
+ */
+int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
+{
+       if (root->log_root) {
+               free_log_tree(trans, root->log_root);
+               root->log_root = NULL;
+       }
+       return 0;
+}
+
+int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
+                            struct btrfs_fs_info *fs_info)
+{
+       if (fs_info->log_root_tree) {
+               free_log_tree(trans, fs_info->log_root_tree);
+               fs_info->log_root_tree = NULL;
+       }
        return 0;
 }
 
@@ -2203,6 +2193,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
        struct btrfs_dir_item *di;
        struct btrfs_path *path;
        int ret;
+       int err = 0;
        int bytes_del = 0;
 
        if (BTRFS_I(dir)->logged_trans < trans->transid)
@@ -2218,7 +2209,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
                                   name, name_len, -1);
-       if (di && !IS_ERR(di)) {
+       if (IS_ERR(di)) {
+               err = PTR_ERR(di);
+               goto fail;
+       }
+       if (di) {
                ret = btrfs_delete_one_dir_name(trans, log, path, di);
                bytes_del += name_len;
                BUG_ON(ret);
@@ -2226,7 +2221,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
        btrfs_release_path(log, path);
        di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino,
                                         index, name, name_len, -1);
-       if (di && !IS_ERR(di)) {
+       if (IS_ERR(di)) {
+               err = PTR_ERR(di);
+               goto fail;
+       }
+       if (di) {
                ret = btrfs_delete_one_dir_name(trans, log, path, di);
                bytes_del += name_len;
                BUG_ON(ret);
@@ -2244,6 +2243,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
                btrfs_release_path(log, path);
 
                ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
+               if (ret < 0) {
+                       err = ret;
+                       goto fail;
+               }
                if (ret == 0) {
                        struct btrfs_inode_item *item;
                        u64 i_size;
@@ -2261,9 +2264,13 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
                        ret = 0;
                btrfs_release_path(log, path);
        }
-
+fail:
        btrfs_free_path(path);
        mutex_unlock(&BTRFS_I(dir)->log_mutex);
+       if (ret == -ENOSPC) {
+               root->fs_info->last_trans_log_full_commit = trans->transid;
+               ret = 0;
+       }
        btrfs_end_log_trans(root);
 
        return 0;
@@ -2291,6 +2298,10 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
        ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino,
                                  dirid, &index);
        mutex_unlock(&BTRFS_I(inode)->log_mutex);
+       if (ret == -ENOSPC) {
+               root->fs_info->last_trans_log_full_commit = trans->transid;
+               ret = 0;
+       }
        btrfs_end_log_trans(root);
 
        return ret;
@@ -2318,7 +2329,8 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
        else
                key.type = BTRFS_DIR_LOG_INDEX_KEY;
        ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
-       BUG_ON(ret);
+       if (ret)
+               return ret;
 
        item = btrfs_item_ptr(path->nodes[0], path->slots[0],
                              struct btrfs_dir_log_item);
@@ -2343,6 +2355,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
        struct btrfs_key max_key;
        struct btrfs_root *log = root->log_root;
        struct extent_buffer *src;
+       int err = 0;
        int ret;
        int i;
        int nritems;
@@ -2405,6 +2418,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                        ret = overwrite_item(trans, log, dst_path,
                                             path->nodes[0], path->slots[0],
                                             &tmp);
+                       if (ret) {
+                               err = ret;
+                               goto done;
+                       }
                }
        }
        btrfs_release_path(root, path);
@@ -2432,7 +2449,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                                goto done;
                        ret = overwrite_item(trans, log, dst_path, src, i,
                                             &min_key);
-                       BUG_ON(ret);
+                       if (ret) {
+                               err = ret;
+                               goto done;
+                       }
                }
                path->slots[0] = nritems;
 
@@ -2454,22 +2474,30 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                        ret = overwrite_item(trans, log, dst_path,
                                             path->nodes[0], path->slots[0],
                                             &tmp);
-
-                       BUG_ON(ret);
-                       last_offset = tmp.offset;
+                       if (ret)
+                               err = ret;
+                       else
+                               last_offset = tmp.offset;
                        goto done;
                }
        }
 done:
-       *last_offset_ret = last_offset;
        btrfs_release_path(root, path);
        btrfs_release_path(log, dst_path);
 
-       /* insert the log range keys to indicate where the log is valid */
-       ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino,
-                                first_offset, last_offset);
-       BUG_ON(ret);
-       return 0;
+       if (err == 0) {
+               *last_offset_ret = last_offset;
+               /*
+                * insert the log range keys to indicate where the log
+                * is valid
+                */
+               ret = insert_dir_log_key(trans, log, path, key_type,
+                                        inode->i_ino, first_offset,
+                                        last_offset);
+               if (ret)
+                       err = ret;
+       }
+       return err;
 }
 
 /*
@@ -2501,7 +2529,8 @@ again:
                ret = log_dir_items(trans, root, inode, path,
                                    dst_path, key_type, min_key,
                                    &max_key);
-               BUG_ON(ret);
+               if (ret)
+                       return ret;
                if (max_key == (u64)-1)
                        break;
                min_key = max_key + 1;
@@ -2535,8 +2564,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
 
        while (1) {
                ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
-
-               if (ret != 1)
+               BUG_ON(ret == 0);
+               if (ret < 0)
                        break;
 
                if (path->slots[0] == 0)
@@ -2554,7 +2583,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
                btrfs_release_path(log, path);
        }
        btrfs_release_path(log, path);
-       return 0;
+       return ret;
 }
 
 static noinline int copy_items(struct btrfs_trans_handle *trans,
@@ -2587,7 +2616,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
        }
        ret = btrfs_insert_empty_items(trans, log, dst_path,
                                       ins_keys, ins_sizes, nr);
-       BUG_ON(ret);
+       if (ret) {
+               kfree(ins_data);
+               return ret;
+       }
 
        for (i = 0; i < nr; i++, dst_path->slots[0]++) {
                dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
@@ -2660,16 +2692,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
         * we have to do this after the loop above to avoid changing the
         * log tree while trying to change the log tree.
         */
+       ret = 0;
        while (!list_empty(&ordered_sums)) {
                struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
                                                   struct btrfs_ordered_sum,
                                                   list);
-               ret = btrfs_csum_file_blocks(trans, log, sums);
-               BUG_ON(ret);
+               if (!ret)
+                       ret = btrfs_csum_file_blocks(trans, log, sums);
                list_del(&sums->list);
                kfree(sums);
        }
-       return 0;
+       return ret;
 }
 
 /* log a single inode in the tree log.
@@ -2697,6 +2730,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        struct btrfs_root *log = root->log_root;
        struct extent_buffer *src = NULL;
        u32 size;
+       int err = 0;
        int ret;
        int nritems;
        int ins_start_slot = 0;
@@ -2739,7 +2773,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        } else {
                ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
        }
-       BUG_ON(ret);
+       if (ret) {
+               err = ret;
+               goto out_unlock;
+       }
        path->keep_locks = 1;
 
        while (1) {
@@ -2768,7 +2805,10 @@ again:
 
                ret = copy_items(trans, log, dst_path, src, ins_start_slot,
                                 ins_nr, inode_only);
-               BUG_ON(ret);
+               if (ret) {
+                       err = ret;
+                       goto out_unlock;
+               }
                ins_nr = 1;
                ins_start_slot = path->slots[0];
 next_slot:
@@ -2784,7 +2824,10 @@ next_slot:
                        ret = copy_items(trans, log, dst_path, src,
                                         ins_start_slot,
                                         ins_nr, inode_only);
-                       BUG_ON(ret);
+                       if (ret) {
+                               err = ret;
+                               goto out_unlock;
+                       }
                        ins_nr = 0;
                }
                btrfs_release_path(root, path);
@@ -2802,7 +2845,10 @@ next_slot:
                ret = copy_items(trans, log, dst_path, src,
                                 ins_start_slot,
                                 ins_nr, inode_only);
-               BUG_ON(ret);
+               if (ret) {
+                       err = ret;
+                       goto out_unlock;
+               }
                ins_nr = 0;
        }
        WARN_ON(ins_nr);
@@ -2810,14 +2856,18 @@ next_slot:
                btrfs_release_path(root, path);
                btrfs_release_path(log, dst_path);
                ret = log_directory_changes(trans, root, inode, path, dst_path);
-               BUG_ON(ret);
+               if (ret) {
+                       err = ret;
+                       goto out_unlock;
+               }
        }
        BTRFS_I(inode)->logged_trans = trans->transid;
+out_unlock:
        mutex_unlock(&BTRFS_I(inode)->log_mutex);
 
        btrfs_free_path(path);
        btrfs_free_path(dst_path);
-       return 0;
+       return err;
 }
 
 /*
@@ -2942,10 +2992,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                goto end_no_trans;
        }
 
-       start_log_trans(trans, root);
+       ret = start_log_trans(trans, root);
+       if (ret)
+               goto end_trans;
 
        ret = btrfs_log_inode(trans, root, inode, inode_only);
-       BUG_ON(ret);
+       if (ret)
+               goto end_trans;
 
        /*
         * for regular files, if its inode is already on disk, we don't
@@ -2955,8 +3008,10 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
         */
        if (S_ISREG(inode->i_mode) &&
            BTRFS_I(inode)->generation <= last_committed &&
-           BTRFS_I(inode)->last_unlink_trans <= last_committed)
-                       goto no_parent;
+           BTRFS_I(inode)->last_unlink_trans <= last_committed) {
+               ret = 0;
+               goto end_trans;
+       }
 
        inode_only = LOG_INODE_EXISTS;
        while (1) {
@@ -2970,15 +3025,21 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                if (BTRFS_I(inode)->generation >
                    root->fs_info->last_trans_committed) {
                        ret = btrfs_log_inode(trans, root, inode, inode_only);
-                       BUG_ON(ret);
+                       if (ret)
+                               goto end_trans;
                }
                if (IS_ROOT(parent))
                        break;
 
                parent = parent->d_parent;
        }
-no_parent:
        ret = 0;
+end_trans:
+       if (ret < 0) {
+               BUG_ON(ret != -ENOSPC);
+               root->fs_info->last_trans_log_full_commit = trans->transid;
+               ret = 1;
+       }
        btrfs_end_log_trans(root);
 end_no_trans:
        return ret;
@@ -3020,7 +3081,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
-       trans = btrfs_start_transaction(fs_info->tree_root, 1);
+       trans = btrfs_start_transaction(fs_info->tree_root, 0);
 
        wc.trans = trans;
        wc.pin = 1;
index 0776eac..3dfae84 100644 (file)
@@ -25,6 +25,8 @@
 int btrfs_sync_log(struct btrfs_trans_handle *trans,
                   struct btrfs_root *root);
 int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
+int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
+                            struct btrfs_fs_info *fs_info);
 int btrfs_recover_log_trees(struct btrfs_root *tree_root);
 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct dentry *dentry);
index 8db7b14..d6e3af8 100644 (file)
@@ -1097,7 +1097,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
        if (!path)
                return -ENOMEM;
 
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_start_transaction(root, 0);
        key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
        key.type = BTRFS_DEV_ITEM_KEY;
        key.offset = device->devid;
@@ -1486,7 +1486,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                goto error;
        }
 
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_start_transaction(root, 0);
        lock_chunks(root);
 
        device->barriers = 1;
@@ -1751,9 +1751,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
 
        /* step one, relocate all the extents inside this chunk */
        ret = btrfs_relocate_block_group(extent_root, chunk_offset);
-       BUG_ON(ret);
+       if (ret)
+               return ret;
 
-       trans = btrfs_start_transaction(root, 1);
+       trans = btrfs_start_transaction(root, 0);
        BUG_ON(!trans);
 
        lock_chunks(root);
@@ -1925,7 +1926,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
                        break;
                BUG_ON(ret);
 
-               trans = btrfs_start_transaction(dev_root, 1);
+               trans = btrfs_start_transaction(dev_root, 0);
                BUG_ON(!trans);
 
                ret = btrfs_grow_device(trans, device, old_size);
@@ -2094,11 +2095,7 @@ again:
        }
 
        /* Shrinking succeeded, else we would be at "done". */
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans) {
-               ret = -ENOMEM;
-               goto done;
-       }
+       trans = btrfs_start_transaction(root, 0);
        lock_chunks(root);
 
        device->disk_total_bytes = new_size;
index 59acd3e..88ecbb2 100644 (file)
@@ -154,15 +154,10 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
        if (trans)
                return do_setxattr(trans, inode, name, value, size, flags);
 
-       ret = btrfs_reserve_metadata_space(root, 2);
-       if (ret)
-               return ret;
+       trans = btrfs_start_transaction(root, 2);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
 
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans) {
-               ret = -ENOMEM;
-               goto out;
-       }
        btrfs_set_trans_block_group(trans, inode);
 
        ret = do_setxattr(trans, inode, name, value, size, flags);
@@ -174,7 +169,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
        BUG_ON(ret);
 out:
        btrfs_end_transaction_throttle(trans, root);
-       btrfs_unreserve_metadata_space(root, 2);
        return ret;
 }
 
index 0544873..f0b391c 100644 (file)
@@ -568,6 +568,79 @@ out:
        return ret;
 }
 
+/* A write operation does a read from user space and vice versa */
+#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
+
+ssize_t compat_rw_copy_check_uvector(int type,
+               const struct compat_iovec __user *uvector, unsigned long nr_segs,
+               unsigned long fast_segs, struct iovec *fast_pointer,
+               struct iovec **ret_pointer)
+{
+       compat_ssize_t tot_len;
+       struct iovec *iov = *ret_pointer = fast_pointer;
+       ssize_t ret = 0;
+       int seg;
+
+       /*
+        * SuS says "The readv() function *may* fail if the iovcnt argument
+        * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+        * traditionally returned zero for zero segments, so...
+        */
+       if (nr_segs == 0)
+               goto out;
+
+       ret = -EINVAL;
+       if (nr_segs > UIO_MAXIOV || nr_segs < 0)
+               goto out;
+       if (nr_segs > fast_segs) {
+               ret = -ENOMEM;
+               iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+               if (iov == NULL) {
+                       *ret_pointer = fast_pointer;
+                       goto out;
+               }
+       }
+       *ret_pointer = iov;
+
+       /*
+        * Single unix specification:
+        * We should -EINVAL if an element length is not >= 0 and fitting an
+        * ssize_t.  The total length is fitting an ssize_t
+        *
+        * Be careful here because iov_len is a size_t not an ssize_t
+        */
+       tot_len = 0;
+       ret = -EINVAL;
+       for (seg = 0; seg < nr_segs; seg++) {
+               compat_ssize_t tmp = tot_len;
+               compat_uptr_t buf;
+               compat_ssize_t len;
+
+               if (__get_user(len, &uvector->iov_len) ||
+                  __get_user(buf, &uvector->iov_base)) {
+                       ret = -EFAULT;
+                       goto out;
+               }
+               if (len < 0)    /* size_t not fitting in compat_ssize_t .. */
+                       goto out;
+               tot_len += len;
+               if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
+                       goto out;
+               if (!access_ok(vrfy_dir(type), buf, len)) {
+                       ret = -EFAULT;
+                       goto out;
+               }
+               iov->iov_base = compat_ptr(buf);
+               iov->iov_len = (compat_size_t) len;
+               uvector++;
+               iov++;
+       }
+       ret = tot_len;
+
+out:
+       return ret;
+}
+
 static inline long
 copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
 {
@@ -600,7 +673,7 @@ compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
        iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
        ret = copy_iocb(nr, iocb, iocb64);
        if (!ret)
-               ret = sys_io_submit(ctx_id, nr, iocb64);
+               ret = do_io_submit(ctx_id, nr, iocb64, 1);
        return ret;
 }
 
@@ -1077,70 +1150,21 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 {
        compat_ssize_t tot_len;
        struct iovec iovstack[UIO_FASTIOV];
-       struct iovec *iov=iovstack, *vector;
+       struct iovec *iov;
        ssize_t ret;
-       int seg;
        io_fn_t fn;
        iov_fn_t fnv;
 
-       /*
-        * SuS says "The readv() function *may* fail if the iovcnt argument
-        * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-        * traditionally returned zero for zero segments, so...
-        */
-       ret = 0;
-       if (nr_segs == 0)
-               goto out;
-
-       /*
-        * First get the "struct iovec" from user memory and
-        * verify all the pointers
-        */
        ret = -EINVAL;
-       if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0))
-               goto out;
        if (!file->f_op)
                goto out;
-       if (nr_segs > UIO_FASTIOV) {
-               ret = -ENOMEM;
-               iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
-               if (!iov)
-                       goto out;
-       }
+
        ret = -EFAULT;
        if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
                goto out;
 
-       /*
-        * Single unix specification:
-        * We should -EINVAL if an element length is not >= 0 and fitting an
-        * ssize_t.  The total length is fitting an ssize_t
-        *
-        * Be careful here because iov_len is a size_t not an ssize_t
-        */
-       tot_len = 0;
-       vector = iov;
-       ret = -EINVAL;
-       for (seg = 0 ; seg < nr_segs; seg++) {
-               compat_ssize_t tmp = tot_len;
-               compat_ssize_t len;
-               compat_uptr_t buf;
-
-               if (__get_user(len, &uvector->iov_len) ||
-                   __get_user(buf, &uvector->iov_base)) {
-                       ret = -EFAULT;
-                       goto out;
-               }
-               if (len < 0)    /* size_t not fitting an compat_ssize_t .. */
-                       goto out;
-               tot_len += len;
-               if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
-                       goto out;
-               vector->iov_base = compat_ptr(buf);
-               vector->iov_len = (compat_size_t) len;
-               uvector++;
-               vector++;
-       }
+       tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs,
+                                              UIO_FASTIOV, iovstack, &iov);
        if (tot_len == 0) {
                ret = 0;
                goto out;
index e82adc2..da111aa 100644 (file)
@@ -82,6 +82,8 @@ struct dio {
        int reap_counter;               /* rate limit reaping */
        get_block_t *get_block;         /* block mapping function */
        dio_iodone_t *end_io;           /* IO completion function */
+       dio_submit_t *submit_io;        /* IO submition function */
+       loff_t logical_offset_in_bio;   /* current first logical block in bio */
        sector_t final_block_in_bio;    /* current final block in bio + 1 */
        sector_t next_block_for_io;     /* next block to be put under IO,
                                           in dio_blocks units */
@@ -96,6 +98,7 @@ struct dio {
        unsigned cur_page_offset;       /* Offset into it, in bytes */
        unsigned cur_page_len;          /* Nr of bytes at cur_page_offset */
        sector_t cur_page_block;        /* Where it starts */
+       loff_t cur_page_fs_offset;      /* Offset in file */
 
        /* BIO completion state */
        spinlock_t bio_lock;            /* protects BIO fields below */
@@ -300,6 +303,26 @@ static void dio_bio_end_io(struct bio *bio, int error)
        spin_unlock_irqrestore(&dio->bio_lock, flags);
 }
 
+/**
+ * dio_end_io - handle the end io action for the given bio
+ * @bio: The direct io bio thats being completed
+ * @error: Error if there was one
+ *
+ * This is meant to be called by any filesystem that uses their own dio_submit_t
+ * so that the DIO specific endio actions are dealt with after the filesystem
+ * has done it's completion work.
+ */
+void dio_end_io(struct bio *bio, int error)
+{
+       struct dio *dio = bio->bi_private;
+
+       if (dio->is_async)
+               dio_bio_end_aio(bio, error);
+       else
+               dio_bio_end_io(bio, error);
+}
+EXPORT_SYMBOL_GPL(dio_end_io);
+
 static int
 dio_bio_alloc(struct dio *dio, struct block_device *bdev,
                sector_t first_sector, int nr_vecs)
@@ -316,6 +339,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
                bio->bi_end_io = dio_bio_end_io;
 
        dio->bio = bio;
+       dio->logical_offset_in_bio = dio->cur_page_fs_offset;
        return 0;
 }
 
@@ -340,10 +364,15 @@ static void dio_bio_submit(struct dio *dio)
        if (dio->is_async && dio->rw == READ)
                bio_set_pages_dirty(bio);
 
-       submit_bio(dio->rw, bio);
+       if (dio->submit_io)
+               dio->submit_io(dio->rw, bio, dio->inode,
+                              dio->logical_offset_in_bio);
+       else
+               submit_bio(dio->rw, bio);
 
        dio->bio = NULL;
        dio->boundary = 0;
+       dio->logical_offset_in_bio = 0;
 }
 
 /*
@@ -603,10 +632,26 @@ static int dio_send_cur_page(struct dio *dio)
        int ret = 0;
 
        if (dio->bio) {
+               loff_t cur_offset = dio->block_in_file << dio->blkbits;
+               loff_t bio_next_offset = dio->logical_offset_in_bio +
+                       dio->bio->bi_size;
+
                /*
-                * See whether this new request is contiguous with the old
+                * See whether this new request is contiguous with the old.
+                *
+                * Btrfs cannot handl having logically non-contiguous requests
+                * submitted.  For exmple if you have
+                *
+                * Logical:  [0-4095][HOLE][8192-12287]
+                * Phyiscal: [0-4095]      [4096-8181]
+                *
+                * We cannot submit those pages together as one BIO.  So if our
+                * current logical offset in the file does not equal what would
+                * be the next logical offset in the bio, submit the bio we
+                * have.
                 */
-               if (dio->final_block_in_bio != dio->cur_page_block)
+               if (dio->final_block_in_bio != dio->cur_page_block ||
+                   cur_offset != bio_next_offset)
                        dio_bio_submit(dio);
                /*
                 * Submit now if the underlying fs is about to perform a
@@ -701,6 +746,7 @@ submit_page_section(struct dio *dio, struct page *page,
        dio->cur_page_offset = offset;
        dio->cur_page_len = len;
        dio->cur_page_block = blocknr;
+       dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits;
 out:
        return ret;
 }
@@ -935,7 +981,7 @@ static ssize_t
 direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 
        const struct iovec *iov, loff_t offset, unsigned long nr_segs, 
        unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
-       struct dio *dio)
+       dio_submit_t submit_io, struct dio *dio)
 {
        unsigned long user_addr; 
        unsigned long flags;
@@ -952,6 +998,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
        dio->get_block = get_block;
        dio->end_io = end_io;
+       dio->submit_io = submit_io;
        dio->final_block_in_bio = -1;
        dio->next_block_for_io = -1;
 
@@ -1008,7 +1055,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
                }
        } /* end iovec loop */
 
-       if (ret == -ENOTBLK && (rw & WRITE)) {
+       if (ret == -ENOTBLK) {
                /*
                 * The remaining part of the request will be
                 * be handled by buffered I/O when we return
@@ -1110,7 +1157,7 @@ ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        struct block_device *bdev, const struct iovec *iov, loff_t offset, 
        unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-       int flags)
+       dio_submit_t submit_io, int flags)
 {
        int seg;
        size_t size;
@@ -1197,7 +1244,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
                (end > i_size_read(inode)));
 
        retval = direct_io_worker(rw, iocb, inode, iov, offset,
-                               nr_segs, blkbits, get_block, end_io, dio);
+                               nr_segs, blkbits, get_block, end_io,
+                               submit_io, dio);
 
        /*
         * In case of error extending write may have instantiated a few
index 9badbc0..e19de6a 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -768,7 +768,6 @@ static int de_thread(struct task_struct *tsk)
        struct signal_struct *sig = tsk->signal;
        struct sighand_struct *oldsighand = tsk->sighand;
        spinlock_t *lock = &oldsighand->siglock;
-       int count;
 
        if (thread_group_empty(tsk))
                goto no_thread_group;
@@ -785,13 +784,13 @@ static int de_thread(struct task_struct *tsk)
                spin_unlock_irq(lock);
                return -EAGAIN;
        }
+
        sig->group_exit_task = tsk;
-       zap_other_threads(tsk);
+       sig->notify_count = zap_other_threads(tsk);
+       if (!thread_group_leader(tsk))
+               sig->notify_count--;
 
-       /* Account for the thread group leader hanging around: */
-       count = thread_group_leader(tsk) ? 1 : 2;
-       sig->notify_count = count;
-       while (atomic_read(&sig->count) > count) {
+       while (sig->notify_count) {
                __set_current_state(TASK_UNINTERRUPTIBLE);
                spin_unlock_irq(lock);
                schedule();
@@ -1662,12 +1661,15 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
        struct task_struct *tsk = current;
        struct mm_struct *mm = tsk->mm;
        struct completion *vfork_done;
-       int core_waiters;
+       int core_waiters = -EBUSY;
 
        init_completion(&core_state->startup);
        core_state->dumper.task = tsk;
        core_state->dumper.next = NULL;
-       core_waiters = zap_threads(tsk, mm, core_state, exit_code);
+
+       down_write(&mm->mmap_sem);
+       if (!mm->core_state)
+               core_waiters = zap_threads(tsk, mm, core_state, exit_code);
        up_write(&mm->mmap_sem);
 
        if (unlikely(core_waiters < 0))
@@ -1787,21 +1789,61 @@ static void wait_for_dump_helpers(struct file *file)
 }
 
 
+/*
+ * uhm_pipe_setup
+ * helper function to customize the process used
+ * to collect the core in userspace.  Specifically
+ * it sets up a pipe and installs it as fd 0 (stdin)
+ * for the process.  Returns 0 on success, or
+ * PTR_ERR on failure.
+ * Note that it also sets the core limit to 1.  This
+ * is a special value that we use to trap recursive
+ * core dumps
+ */
+static int umh_pipe_setup(struct subprocess_info *info)
+{
+       struct file *rp, *wp;
+       struct fdtable *fdt;
+       struct coredump_params *cp = (struct coredump_params *)info->data;
+       struct files_struct *cf = current->files;
+
+       wp = create_write_pipe(0);
+       if (IS_ERR(wp))
+               return PTR_ERR(wp);
+
+       rp = create_read_pipe(wp, 0);
+       if (IS_ERR(rp)) {
+               free_write_pipe(wp);
+               return PTR_ERR(rp);
+       }
+
+       cp->file = wp;
+
+       sys_close(0);
+       fd_install(0, rp);
+       spin_lock(&cf->file_lock);
+       fdt = files_fdtable(cf);
+       FD_SET(0, fdt->open_fds);
+       FD_CLR(0, fdt->close_on_exec);
+       spin_unlock(&cf->file_lock);
+
+       /* and disallow core files too */
+       current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
+
+       return 0;
+}
+
 void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 {
        struct core_state core_state;
        char corename[CORENAME_MAX_SIZE + 1];
        struct mm_struct *mm = current->mm;
        struct linux_binfmt * binfmt;
-       struct inode * inode;
        const struct cred *old_cred;
        struct cred *cred;
        int retval = 0;
        int flag = 0;
-       int ispipe = 0;
-       char **helper_argv = NULL;
-       int helper_argc = 0;
-       int dump_count = 0;
+       int ispipe;
        static atomic_t core_dump_count = ATOMIC_INIT(0);
        struct coredump_params cprm = {
                .signr = signr,
@@ -1820,23 +1862,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
        binfmt = mm->binfmt;
        if (!binfmt || !binfmt->core_dump)
                goto fail;
-
-       cred = prepare_creds();
-       if (!cred) {
-               retval = -ENOMEM;
+       if (!__get_dumpable(cprm.mm_flags))
                goto fail;
-       }
 
-       down_write(&mm->mmap_sem);
-       /*
-        * If another thread got here first, or we are not dumpable, bail out.
-        */
-       if (mm->core_state || !__get_dumpable(cprm.mm_flags)) {
-               up_write(&mm->mmap_sem);
-               put_cred(cred);
+       cred = prepare_creds();
+       if (!cred)
                goto fail;
-       }
-
        /*
         *      We cannot trust fsuid as being the "true" uid of the
         *      process nor do we know its entire history. We only know it
@@ -1849,10 +1880,8 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
        }
 
        retval = coredump_wait(exit_code, &core_state);
-       if (retval < 0) {
-               put_cred(cred);
-               goto fail;
-       }
+       if (retval < 0)
+               goto fail_creds;
 
        old_cred = override_creds(cred);
 
@@ -1870,19 +1899,19 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
        ispipe = format_corename(corename, signr);
        unlock_kernel();
 
-       if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
-               goto fail_unlock;
-
        if (ispipe) {
-               if (cprm.limit == 0) {
+               int dump_count;
+               char **helper_argv;
+
+               if (cprm.limit == 1) {
                        /*
                         * Normally core limits are irrelevant to pipes, since
                         * we're not writing to the file system, but we use
-                        * cprm.limit of 0 here as a speacial value. Any
-                        * non-zero limit gets set to RLIM_INFINITY below, but
+                        * cprm.limit of 1 here as a speacial value. Any
+                        * non-1 limit gets set to RLIM_INFINITY below, but
                         * a limit of 0 skips the dump.  This is a consistent
                         * way to catch recursive crashes.  We can still crash
-                        * if the core_pattern binary sets RLIM_CORE =  !0
+                        * if the core_pattern binary sets RLIM_CORE =  !1
                         * but it runs as root, and can do lots of stupid things
                         * Note that we use task_tgid_vnr here to grab the pid
                         * of the process group leader.  That way we get the
@@ -1890,11 +1919,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
                         * core_pattern process dies.
                         */
                        printk(KERN_WARNING
-                               "Process %d(%s) has RLIMIT_CORE set to 0\n",
+                               "Process %d(%s) has RLIMIT_CORE set to 1\n",
                                task_tgid_vnr(current), current->comm);
                        printk(KERN_WARNING "Aborting core\n");
                        goto fail_unlock;
                }
+               cprm.limit = RLIM_INFINITY;
 
                dump_count = atomic_inc_return(&core_dump_count);
                if (core_pipe_limit && (core_pipe_limit < dump_count)) {
@@ -1904,71 +1934,74 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
                        goto fail_dropcount;
                }
 
-               helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
+               helper_argv = argv_split(GFP_KERNEL, corename+1, NULL);
                if (!helper_argv) {
                        printk(KERN_WARNING "%s failed to allocate memory\n",
                               __func__);
                        goto fail_dropcount;
                }
 
-               cprm.limit = RLIM_INFINITY;
-
-               /* SIGPIPE can happen, but it's just never processed */
-               if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-                               &cprm.file)) {
+               retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
+                                       NULL, UMH_WAIT_EXEC, umh_pipe_setup,
+                                       NULL, &cprm);
+               argv_free(helper_argv);
+               if (retval) {
                        printk(KERN_INFO "Core dump to %s pipe failed\n",
                               corename);
-                       goto fail_dropcount;
+                       goto close_fail;
                }
-       } else
+       } else {
+               struct inode *inode;
+
+               if (cprm.limit < binfmt->min_coredump)
+                       goto fail_unlock;
+
                cprm.file = filp_open(corename,
                                 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
                                 0600);
-       if (IS_ERR(cprm.file))
-               goto fail_dropcount;
-       inode = cprm.file->f_path.dentry->d_inode;
-       if (inode->i_nlink > 1)
-               goto close_fail;        /* multiple links - don't dump */
-       if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
-               goto close_fail;
-
-       /* AK: actually i see no reason to not allow this for named pipes etc.,
-          but keep the previous behaviour for now. */
-       if (!ispipe && !S_ISREG(inode->i_mode))
-               goto close_fail;
-       /*
-        * Dont allow local users get cute and trick others to coredump
-        * into their pre-created files:
-        * Note, this is not relevant for pipes
-        */
-       if (!ispipe && (inode->i_uid != current_fsuid()))
-               goto close_fail;
-       if (!cprm.file->f_op)
-               goto close_fail;
-       if (!cprm.file->f_op->write)
-               goto close_fail;
-       if (!ispipe &&
-           do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
-               goto close_fail;
+               if (IS_ERR(cprm.file))
+                       goto fail_unlock;
 
-       retval = binfmt->core_dump(&cprm);
+               inode = cprm.file->f_path.dentry->d_inode;
+               if (inode->i_nlink > 1)
+                       goto close_fail;
+               if (d_unhashed(cprm.file->f_path.dentry))
+                       goto close_fail;
+               /*
+                * AK: actually i see no reason to not allow this for named
+                * pipes etc, but keep the previous behaviour for now.
+                */
+               if (!S_ISREG(inode->i_mode))
+                       goto close_fail;
+               /*
+                * Dont allow local users get cute and trick others to coredump
+                * into their pre-created files.
+                */
+               if (inode->i_uid != current_fsuid())
+                       goto close_fail;
+               if (!cprm.file->f_op || !cprm.file->f_op->write)
+                       goto close_fail;
+               if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+                       goto close_fail;
+       }
 
+       retval = binfmt->core_dump(&cprm);
        if (retval)
                current->signal->group_exit_code |= 0x80;
-close_fail:
+
        if (ispipe && core_pipe_limit)
                wait_for_dump_helpers(cprm.file);
-       filp_close(cprm.file, NULL);
+close_fail:
+       if (cprm.file)
+               filp_close(cprm.file, NULL);
 fail_dropcount:
-       if (dump_count)
+       if (ispipe)
                atomic_dec(&core_dump_count);
 fail_unlock:
-       if (helper_argv)
-               argv_free(helper_argv);
-
+       coredump_finish(mm);
        revert_creds(old_cred);
+fail_creds:
        put_cred(cred);
-       coredump_finish(mm);
 fail:
        return;
 }
index d2f37a5..95b7594 100644 (file)
@@ -591,14 +591,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
        ret = ext4_mb_new_blocks(handle, &ar, errp);
        if (count)
                *count = ar.len;
-
        /*
-        * Account for the allocated meta blocks
+        * Account for the allocated meta blocks.  We will never
+        * fail EDQUOT for metdata, but we do account for it.
         */
        if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
                spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
                EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+               dquot_alloc_block_nofail(inode, ar.len);
        }
        return ret;
 }
index 538c486..5b6973f 100644 (file)
@@ -72,9 +72,9 @@ static int add_system_zone(struct ext4_sb_info *sbi,
                else if (start_blk >= (entry->start_blk + entry->count))
                        n = &(*n)->rb_right;
                else {
-                       if (start_blk + count > (entry->start_blk + 
+                       if (start_blk + count > (entry->start_blk +
                                                 entry->count))
-                               entry->count = (start_blk + count - 
+                               entry->count = (start_blk + count -
                                                entry->start_blk);
                        new_node = *n;
                        new_entry = rb_entry(new_node, struct ext4_system_zone,
index 86cb6d8..ea5e6cb 100644 (file)
@@ -83,11 +83,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
                error_msg = "inode out of bounds";
 
        if (error_msg != NULL)
-               __ext4_error(dir->i_sb, function,
-                       "bad entry in directory #%lu: %s - block=%llu"
+               ext4_error_inode(function, dir,
+                       "bad entry in directory: %s - block=%llu"
                        "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
-                       dir->i_ino, error_msg, 
-                       (unsigned long long) bh->b_blocknr,     
+                       error_msg, (unsigned long long) bh->b_blocknr,
                        (unsigned) (offset%bh->b_size), offset,
                        le32_to_cpu(de->inode),
                        rlen, de->name_len);
@@ -111,7 +110,7 @@ static int ext4_readdir(struct file *filp,
 
        if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
                                    EXT4_FEATURE_COMPAT_DIR_INDEX) &&
-           ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
+           ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
             ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
                err = ext4_dx_readdir(filp, dirent, filldir);
                if (err != ERR_BAD_DX_DIR) {
@@ -122,20 +121,20 @@ static int ext4_readdir(struct file *filp,
                 * We don't set the inode dirty flag since it's not
                 * critical that it get flushed back to the disk.
                 */
-               EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
+               ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX);
        }
        stored = 0;
        offset = filp->f_pos & (sb->s_blocksize - 1);
 
        while (!error && !stored && filp->f_pos < inode->i_size) {
-               ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
-               struct buffer_head map_bh;
+               struct ext4_map_blocks map;
                struct buffer_head *bh = NULL;
 
-               map_bh.b_state = 0;
-               err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
+               map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
+               map.m_len = 1;
+               err = ext4_map_blocks(NULL, inode, &map, 0);
                if (err > 0) {
-                       pgoff_t index = map_bh.b_blocknr >>
+                       pgoff_t index = map.m_pblk >>
                                        (PAGE_CACHE_SHIFT - inode->i_blkbits);
                        if (!ra_has_index(&filp->f_ra, index))
                                page_cache_sync_readahead(
@@ -143,7 +142,7 @@ static int ext4_readdir(struct file *filp,
                                        &filp->f_ra, filp,
                                        index, 1);
                        filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
-                       bh = ext4_bread(NULL, inode, blk, 0, &err);
+                       bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
                }
 
                /*
@@ -152,9 +151,8 @@ static int ext4_readdir(struct file *filp,
                 */
                if (!bh) {
                        if (!dir_has_error) {
-                               ext4_error(sb, "directory #%lu "
+                               EXT4_ERROR_INODE(inode, "directory "
                                           "contains a hole at offset %Lu",
-                                          inode->i_ino,
                                           (unsigned long long) filp->f_pos);
                                dir_has_error = 1;
                        }
index bf938cf..60bd310 100644 (file)
@@ -29,6 +29,9 @@
 #include <linux/wait.h>
 #include <linux/blockgroup_lock.h>
 #include <linux/percpu_counter.h>
+#ifdef __KERNEL__
+#include <linux/compat.h>
+#endif
 
 /*
  * The fourth extended filesystem constants/structures
 #endif
 
 #define EXT4_ERROR_INODE(inode, fmt, a...) \
-       ext4_error_inode(__func__, (inode), (fmt), ## a);
+       ext4_error_inode(__func__, (inode), (fmt), ## a)
 
 #define EXT4_ERROR_FILE(file, fmt, a...)       \
-       ext4_error_file(__func__, (file), (fmt), ## a);
+       ext4_error_file(__func__, (file), (fmt), ## a)
 
 /* data type for block offset of block group */
 typedef int ext4_grpblk_t;
@@ -72,7 +75,7 @@ typedef __u32 ext4_lblk_t;
 typedef unsigned int ext4_group_t;
 
 /*
- * Flags used in mballoc's allocation_context flags field.  
+ * Flags used in mballoc's allocation_context flags field.
  *
  * Also used to show what's going on for debugging purposes when the
  * flag field is exported via the traceport interface
@@ -126,6 +129,29 @@ struct ext4_allocation_request {
 };
 
 /*
+ * Logical to physical block mapping, used by ext4_map_blocks()
+ *
+ * This structure is used to pass requests into ext4_map_blocks() as
+ * well as to store the information returned by ext4_map_blocks().  It
+ * takes less room on the stack than a struct buffer_head.
+ */
+#define EXT4_MAP_NEW           (1 << BH_New)
+#define EXT4_MAP_MAPPED                (1 << BH_Mapped)
+#define EXT4_MAP_UNWRITTEN     (1 << BH_Unwritten)
+#define EXT4_MAP_BOUNDARY      (1 << BH_Boundary)
+#define EXT4_MAP_UNINIT                (1 << BH_Uninit)
+#define EXT4_MAP_FLAGS         (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
+                                EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
+                                EXT4_MAP_UNINIT)
+
+struct ext4_map_blocks {
+       ext4_fsblk_t m_pblk;
+       ext4_lblk_t m_lblk;
+       unsigned int m_len;
+       unsigned int m_flags;
+};
+
+/*
  * For delayed allocation tracking
  */
 struct mpage_da_data {
@@ -321,6 +347,83 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
                return flags & EXT4_OTHER_FLMASK;
 }
 
+/*
+ * Inode flags used for atomic set/get
+ */
+enum {
+       EXT4_INODE_SECRM        = 0,    /* Secure deletion */
+       EXT4_INODE_UNRM         = 1,    /* Undelete */
+       EXT4_INODE_COMPR        = 2,    /* Compress file */
+       EXT4_INODE_SYNC         = 3,    /* Synchronous updates */
+       EXT4_INODE_IMMUTABLE    = 4,    /* Immutable file */
+       EXT4_INODE_APPEND       = 5,    /* writes to file may only append */
+       EXT4_INODE_NODUMP       = 6,    /* do not dump file */
+       EXT4_INODE_NOATIME      = 7,    /* do not update atime */
+/* Reserved for compression usage... */
+       EXT4_INODE_DIRTY        = 8,
+       EXT4_INODE_COMPRBLK     = 9,    /* One or more compressed clusters */
+       EXT4_INODE_NOCOMPR      = 10,   /* Don't compress */
+       EXT4_INODE_ECOMPR       = 11,   /* Compression error */
+/* End compression flags --- maybe not all used */
+       EXT4_INODE_INDEX        = 12,   /* hash-indexed directory */
+       EXT4_INODE_IMAGIC       = 13,   /* AFS directory */
+       EXT4_INODE_JOURNAL_DATA = 14,   /* file data should be journaled */
+       EXT4_INODE_NOTAIL       = 15,   /* file tail should not be merged */
+       EXT4_INODE_DIRSYNC      = 16,   /* dirsync behaviour (directories only) */
+       EXT4_INODE_TOPDIR       = 17,   /* Top of directory hierarchies*/
+       EXT4_INODE_HUGE_FILE    = 18,   /* Set to each huge file */
+       EXT4_INODE_EXTENTS      = 19,   /* Inode uses extents */
+       EXT4_INODE_EA_INODE     = 21,   /* Inode used for large EA */
+       EXT4_INODE_EOFBLOCKS    = 22,   /* Blocks allocated beyond EOF */
+       EXT4_INODE_RESERVED     = 31,   /* reserved for ext4 lib */
+};
+
+#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
+#define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \
+       printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \
+               EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); }
+
+/*
+ * Since it's pretty easy to mix up bit numbers and hex values, and we
+ * can't do a compile-time test for ENUM values, we use a run-time
+ * test to make sure that EXT4_XXX_FL is consistent with respect to
+ * EXT4_INODE_XXX.  If all is well the printk and BUG_ON will all drop
+ * out so it won't cost any extra space in the compiled kernel image.
+ * But it's important that these values are the same, since we are
+ * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL
+ * must be consistent with the values of FS_XXX_FL defined in
+ * include/linux/fs.h and the on-disk values found in ext2, ext3, and
+ * ext4 filesystems, and of course the values defined in e2fsprogs.
+ *
+ * It's not paranoia if the Murphy's Law really *is* out to get you.  :-)
+ */
+static inline void ext4_check_flag_values(void)
+{
+       CHECK_FLAG_VALUE(SECRM);
+       CHECK_FLAG_VALUE(UNRM);
+       CHECK_FLAG_VALUE(COMPR);
+       CHECK_FLAG_VALUE(SYNC);
+       CHECK_FLAG_VALUE(IMMUTABLE);
+       CHECK_FLAG_VALUE(APPEND);
+       CHECK_FLAG_VALUE(NODUMP);
+       CHECK_FLAG_VALUE(NOATIME);
+       CHECK_FLAG_VALUE(DIRTY);
+       CHECK_FLAG_VALUE(COMPRBLK);
+       CHECK_FLAG_VALUE(NOCOMPR);
+       CHECK_FLAG_VALUE(ECOMPR);
+       CHECK_FLAG_VALUE(INDEX);
+       CHECK_FLAG_VALUE(IMAGIC);
+       CHECK_FLAG_VALUE(JOURNAL_DATA);
+       CHECK_FLAG_VALUE(NOTAIL);
+       CHECK_FLAG_VALUE(DIRSYNC);
+       CHECK_FLAG_VALUE(TOPDIR);
+       CHECK_FLAG_VALUE(HUGE_FILE);
+       CHECK_FLAG_VALUE(EXTENTS);
+       CHECK_FLAG_VALUE(EA_INODE);
+       CHECK_FLAG_VALUE(EOFBLOCKS);
+       CHECK_FLAG_VALUE(RESERVED);
+}
+
 /* Used to pass group descriptor data when online resize is done */
 struct ext4_new_group_input {
        __u32 group;            /* Group number for this data */
@@ -332,6 +435,18 @@ struct ext4_new_group_input {
        __u16 unused;
 };
 
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
+struct compat_ext4_new_group_input {
+       u32 group;
+       compat_u64 block_bitmap;
+       compat_u64 inode_bitmap;
+       compat_u64 inode_table;
+       u32 blocks_count;
+       u16 reserved_blocks;
+       u16 unused;
+};
+#endif
+
 /* The struct ext4_new_group_input in kernel space, with free_blocks_count */
 struct ext4_new_group_data {
        __u32 group;
@@ -355,7 +470,7 @@ struct ext4_new_group_data {
 #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT      (EXT4_GET_BLOCKS_UNINIT_EXT|\
                                                 EXT4_GET_BLOCKS_CREATE)
        /* Caller is from the delayed allocation writeout path,
-          so set the magic i_delalloc_reserve_flag after taking the 
+          so set the magic i_delalloc_reserve_flag after taking the
           inode allocation semaphore for */
 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE       0x0004
        /* caller is from the direct IO path, request to creation of an
@@ -398,6 +513,7 @@ struct ext4_new_group_data {
 #define EXT4_IOC_ALLOC_DA_BLKS         _IO('f', 12)
 #define EXT4_IOC_MOVE_EXT              _IOWR('f', 15, struct move_extent)
 
+#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /*
  * ioctl commands in 32 bit emulation
  */
@@ -408,11 +524,13 @@ struct ext4_new_group_data {
 #define EXT4_IOC32_GETRSVSZ            _IOR('f', 5, int)
 #define EXT4_IOC32_SETRSVSZ            _IOW('f', 6, int)
 #define EXT4_IOC32_GROUP_EXTEND                _IOW('f', 7, unsigned int)
+#define EXT4_IOC32_GROUP_ADD           _IOW('f', 8, struct compat_ext4_new_group_input)
 #ifdef CONFIG_JBD2_DEBUG
 #define EXT4_IOC32_WAIT_FOR_READONLY   _IOR('f', 99, int)
 #endif
 #define EXT4_IOC32_GETVERSION_OLD      FS_IOC32_GETVERSION
 #define EXT4_IOC32_SETVERSION_OLD      FS_IOC32_SETVERSION
+#endif
 
 
 /*
@@ -616,9 +734,8 @@ struct ext4_ext_cache {
  */
 struct ext4_inode_info {
        __le32  i_data[15];     /* unconverted */
-       __u32   i_flags;
-       ext4_fsblk_t    i_file_acl;
        __u32   i_dtime;
+       ext4_fsblk_t    i_file_acl;
 
        /*
         * i_block_group is the number of the block group which contains
@@ -629,6 +746,7 @@ struct ext4_inode_info {
         */
        ext4_group_t    i_block_group;
        unsigned long   i_state_flags;          /* Dynamic state flags */
+       unsigned long   i_flags;
 
        ext4_lblk_t             i_dir_start_lookup;
 #ifdef CONFIG_EXT4_FS_XATTR
@@ -1062,22 +1180,25 @@ enum {
        EXT4_STATE_DA_ALLOC_CLOSE,      /* Alloc DA blks on close */
        EXT4_STATE_EXT_MIGRATE,         /* Inode is migrating */
        EXT4_STATE_DIO_UNWRITTEN,       /* need convert on dio done*/
+       EXT4_STATE_NEWENTRY,            /* File just added to dir */
 };
 
-static inline int ext4_test_inode_state(struct inode *inode, int bit)
-{
-       return test_bit(bit, &EXT4_I(inode)->i_state_flags);
-}
-
-static inline void ext4_set_inode_state(struct inode *inode, int bit)
-{
-       set_bit(bit, &EXT4_I(inode)->i_state_flags);
+#define EXT4_INODE_BIT_FNS(name, field)                                        \
+static inline int ext4_test_inode_##name(struct inode *inode, int bit) \
+{                                                                      \
+       return test_bit(bit, &EXT4_I(inode)->i_##field);                \
+}                                                                      \
+static inline void ext4_set_inode_##name(struct inode *inode, int bit) \
+{                                                                      \
+       set_bit(bit, &EXT4_I(inode)->i_##field);                        \
+}                                                                      \
+static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
+{                                                                      \
+       clear_bit(bit, &EXT4_I(inode)->i_##field);                      \
 }
 
-static inline void ext4_clear_inode_state(struct inode *inode, int bit)
-{
-       clear_bit(bit, &EXT4_I(inode)->i_state_flags);
-}
+EXT4_INODE_BIT_FNS(flag, flags)
+EXT4_INODE_BIT_FNS(state, state_flags)
 #else
 /* Assume that user mode programs are passing in an ext4fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
@@ -1264,7 +1385,7 @@ struct ext4_dir_entry_2 {
 
 #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
                                      EXT4_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
+                   ext4_test_inode_flag((dir), EXT4_INODE_INDEX))
 #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
 #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
 
@@ -1678,6 +1799,7 @@ struct ext4_group_info {
        ext4_grpblk_t   bb_first_free;  /* first free block */
        ext4_grpblk_t   bb_free;        /* total free blocks */
        ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
+       ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
        struct          list_head bb_prealloc_list;
 #ifdef DOUBLE_CHECK
        void            *bb_bitmap;
@@ -1772,9 +1894,8 @@ extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
 extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
                                       int chunk);
-extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
-                              ext4_lblk_t iblock, unsigned int max_blocks,
-                              struct buffer_head *bh_result, int flags);
+extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
+                              struct ext4_map_blocks *map, int flags);
 extern void ext4_ext_truncate(struct inode *);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
@@ -1782,6 +1903,8 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
                          loff_t len);
 extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                          ssize_t len);
+extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
+                          struct ext4_map_blocks *map, int flags);
 extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
                           sector_t block, unsigned int max_blocks,
                           struct buffer_head *bh, int flags);
index b79ad51..dade0c0 100644 (file)
@@ -273,7 +273,7 @@ static inline int ext4_should_journal_data(struct inode *inode)
                return 1;
        if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
                return 1;
-       if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
                return 1;
        return 0;
 }
@@ -284,7 +284,7 @@ static inline int ext4_should_order_data(struct inode *inode)
                return 0;
        if (!S_ISREG(inode->i_mode))
                return 0;
-       if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
                return 0;
        if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
                return 1;
@@ -297,7 +297,7 @@ static inline int ext4_should_writeback_data(struct inode *inode)
                return 0;
        if (EXT4_JOURNAL(inode) == NULL)
                return 1;
-       if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
                return 0;
        if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
                return 1;
@@ -321,7 +321,7 @@ static inline int ext4_should_dioread_nolock(struct inode *inode)
                return 0;
        if (!S_ISREG(inode->i_mode))
                return 0;
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return 0;
        if (ext4_should_journal_data(inode))
                return 0;
index 236b834..377309c 100644 (file)
@@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle,
        if (err <= 0)
                return err;
        err = ext4_truncate_restart_trans(handle, inode, needed);
-       /*
-        * We have dropped i_data_sem so someone might have cached again
-        * an extent we are going to truncate.
-        */
-       ext4_ext_invalidate_cache(inode);
+       if (err == 0)
+               err = -EAGAIN;
 
        return err;
 }
@@ -185,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
        if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
                /*
                 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
-                * block groups per flexgroup, reserve the first block 
-                * group for directories and special files.  Regular 
+                * block groups per flexgroup, reserve the first block
+                * group for directories and special files.  Regular
                 * files will start at the second block group.  This
-                * tends to speed up directory access and improves 
+                * tends to speed up directory access and improves
                 * fsck times.
                 */
                block_group &= ~(flex_size-1);
@@ -439,10 +436,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
        return 0;
 
 corrupted:
-       __ext4_error(inode->i_sb, function,
-                       "bad header/extent in inode #%lu: %s - magic %x, "
+       ext4_error_inode(function, inode,
+                       "bad header/extent: %s - magic %x, "
                        "entries %u, max %u(%u), depth %u(%u)",
-                       inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
+                       error_msg, le16_to_cpu(eh->eh_magic),
                        le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
                        max, le16_to_cpu(eh->eh_depth), depth);
 
@@ -1622,9 +1619,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
                merge_done = 1;
                WARN_ON(eh->eh_entries == 0);
                if (!eh->eh_entries)
-                       ext4_error(inode->i_sb,
-                                  "inode#%lu, eh->eh_entries = 0!",
-                                  inode->i_ino);
+                       EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
        }
 
        return merge_done;
@@ -2039,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
        struct ext4_ext_cache *cex;
        int ret = EXT4_EXT_CACHE_NO;
 
-       /* 
+       /*
         * We borrow i_block_reservation_lock to protect i_cached_extent
         */
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2361,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
        int depth = ext_depth(inode);
        struct ext4_ext_path *path;
        handle_t *handle;
-       int i = 0, err = 0;
+       int i, err;
 
        ext_debug("truncate since %u\n", start);
 
@@ -2370,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
        if (IS_ERR(handle))
                return PTR_ERR(handle);
 
+again:
        ext4_ext_invalidate_cache(inode);
 
        /*
         * We start scanning from right side, freeing all the blocks
         * after i_size and walking into the tree depth-wise.
         */
+       depth = ext_depth(inode);
        path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
        if (path == NULL) {
                ext4_journal_stop(handle);
                return -ENOMEM;
        }
+       path[0].p_depth = depth;
        path[0].p_hdr = ext_inode_hdr(inode);
        if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
                err = -EIO;
                goto out;
        }
-       path[0].p_depth = depth;
+       i = err = 0;
 
        while (i >= 0 && err == 0) {
                if (i == depth) {
@@ -2480,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
 out:
        ext4_ext_drop_refs(path);
        kfree(path);
+       if (err == -EAGAIN)
+               goto again;
        ext4_journal_stop(handle);
 
        return err;
@@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error)
 /* FIXME!! we need to try to merge to left or right after zero-out  */
 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 {
-       int ret = -EIO;
+       int ret;
        struct bio *bio;
        int blkbits, blocksize;
        sector_t ee_pblock;
@@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
                        len = ee_len;
 
                bio = bio_alloc(GFP_NOIO, len);
+               if (!bio)
+                       return -ENOMEM;
+
                bio->bi_sector = ee_pblock;
                bio->bi_bdev   = inode->i_sb->s_bdev;
 
@@ -2595,22 +2598,20 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
                submit_bio(WRITE, bio);
                wait_for_completion(&event);
 
-               if (test_bit(BIO_UPTODATE, &bio->bi_flags))
-                       ret = 0;
-               else {
-                       ret = -EIO;
-                       break;
+               if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+                       bio_put(bio);
+                       return -EIO;
                }
                bio_put(bio);
                ee_len    -= done;
                ee_pblock += done  << (blkbits - 9);
        }
-       return ret;
+       return 0;
 }
 
 #define EXT4_EXT_ZERO_LEN 7
 /*
- * This function is called by ext4_ext_get_blocks() if someone tries to write
+ * This function is called by ext4_ext_map_blocks() if someone tries to write
  * to an uninitialized extent. It may result in splitting the uninitialized
  * extent into multiple extents (upto three - one initialized and two
  * uninitialized).
@@ -2620,39 +2621,55 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
  *   c> Splits in three extents: Somone is writing in middle of the extent
  */
 static int ext4_ext_convert_to_initialized(handle_t *handle,
-                                               struct inode *inode,
-                                               struct ext4_ext_path *path,
-                                               ext4_lblk_t iblock,
-                                               unsigned int max_blocks)
+                                          struct inode *inode,
+                                          struct ext4_map_blocks *map,
+                                          struct ext4_ext_path *path)
 {
        struct ext4_extent *ex, newex, orig_ex;
        struct ext4_extent *ex1 = NULL;
        struct ext4_extent *ex2 = NULL;
        struct ext4_extent *ex3 = NULL;
        struct ext4_extent_header *eh;
-       ext4_lblk_t ee_block;
+       ext4_lblk_t ee_block, eof_block;
        unsigned int allocated, ee_len, depth;
        ext4_fsblk_t newblock;
        int err = 0;
        int ret = 0;
+       int may_zeroout;
+
+       ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
+               "block %llu, max_blocks %u\n", inode->i_ino,
+               (unsigned long long)map->m_lblk, map->m_len);
+
+       eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+               inode->i_sb->s_blocksize_bits;
+       if (eof_block < map->m_lblk + map->m_len)
+               eof_block = map->m_lblk + map->m_len;
 
        depth = ext_depth(inode);
        eh = path[depth].p_hdr;
        ex = path[depth].p_ext;
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
-       allocated = ee_len - (iblock - ee_block);
-       newblock = iblock - ee_block + ext_pblock(ex);
+       allocated = ee_len - (map->m_lblk - ee_block);
+       newblock = map->m_lblk - ee_block + ext_pblock(ex);
+
        ex2 = ex;
        orig_ex.ee_block = ex->ee_block;
        orig_ex.ee_len   = cpu_to_le16(ee_len);
        ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
+       /*
+        * It is safe to convert extent to initialized via explicit
+        * zeroout only if extent is fully insde i_size or new_size.
+        */
+       may_zeroout = ee_block + ee_len <= eof_block;
+
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
                goto out;
        /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-       if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+       if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
                err =  ext4_ext_zeroout(inode, &orig_ex);
                if (err)
                        goto fix_extent_len;
@@ -2665,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                return allocated;
        }
 
-       /* ex1: ee_block to iblock - 1 : uninitialized */
-       if (iblock > ee_block) {
+       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
+       if (map->m_lblk > ee_block) {
                ex1 = ex;
-               ex1->ee_len = cpu_to_le16(iblock - ee_block);
+               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
                ext4_ext_mark_uninitialized(ex1);
                ex2 = &newex;
        }
@@ -2677,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
         * we insert ex3, if ex1 is NULL. This is to avoid temporary
         * overlap of blocks.
         */
-       if (!ex1 && allocated > max_blocks)
-               ex2->ee_len = cpu_to_le16(max_blocks);
+       if (!ex1 && allocated > map->m_len)
+               ex2->ee_len = cpu_to_le16(map->m_len);
        /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > max_blocks) {
+       if (allocated > map->m_len) {
                unsigned int newdepth;
                /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-               if (allocated <= EXT4_EXT_ZERO_LEN) {
+               if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
                        /*
-                        * iblock == ee_block is handled by the zerouout
+                        * map->m_lblk == ee_block is handled by the zerouout
                         * at the beginning.
                         * Mark first half uninitialized.
                         * Mark second half initialized and zero out the
@@ -2698,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        ext4_ext_dirty(handle, inode, path + depth);
 
                        ex3 = &newex;
-                       ex3->ee_block = cpu_to_le32(iblock);
+                       ex3->ee_block = cpu_to_le32(map->m_lblk);
                        ext4_ext_store_pblock(ex3, newblock);
                        ex3->ee_len = cpu_to_le16(allocated);
                        err = ext4_ext_insert_extent(handle, inode, path,
@@ -2711,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                ex->ee_len   = orig_ex.ee_len;
                                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                                ext4_ext_dirty(handle, inode, path + depth);
-                               /* blocks available from iblock */
+                               /* blocks available from map->m_lblk */
                                return allocated;
 
                        } else if (err)
@@ -2733,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                 */
                                depth = ext_depth(inode);
                                ext4_ext_drop_refs(path);
-                               path = ext4_ext_find_extent(inode,
-                                                               iblock, path);
+                               path = ext4_ext_find_extent(inode, map->m_lblk,
+                                                           path);
                                if (IS_ERR(path)) {
                                        err = PTR_ERR(path);
                                        return err;
@@ -2754,12 +2771,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        return allocated;
                }
                ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(iblock + max_blocks);
-               ext4_ext_store_pblock(ex3, newblock + max_blocks);
-               ex3->ee_len = cpu_to_le16(allocated - max_blocks);
+               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
+               ext4_ext_store_pblock(ex3, newblock + map->m_len);
+               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
                ext4_ext_mark_uninitialized(ex3);
                err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
-               if (err == -ENOSPC) {
+               if (err == -ENOSPC && may_zeroout) {
                        err =  ext4_ext_zeroout(inode, &orig_ex);
                        if (err)
                                goto fix_extent_len;
@@ -2769,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                        ext4_ext_dirty(handle, inode, path + depth);
                        /* zeroed the full extent */
-                       /* blocks available from iblock */
+                       /* blocks available from map->m_lblk */
                        return allocated;
 
                } else if (err)
@@ -2783,11 +2800,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                 * update the extent length after successful insert of the
                 * split extent
                 */
-               orig_ex.ee_len = cpu_to_le16(ee_len -
-                                               ext4_ext_get_actual_len(ex3));
+               ee_len -= ext4_ext_get_actual_len(ex3);
+               orig_ex.ee_len = cpu_to_le16(ee_len);
+               may_zeroout = ee_block + ee_len <= eof_block;
+
                depth = newdepth;
                ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, iblock, path);
+               path = ext4_ext_find_extent(inode, map->m_lblk, path);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        goto out;
@@ -2801,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                if (err)
                        goto out;
 
-               allocated = max_blocks;
+               allocated = map->m_len;
 
                /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
                 * to insert a extent in the middle zerout directly
                 * otherwise give the extent a chance to merge to left
                 */
                if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-                                                       iblock != ee_block) {
+                       map->m_lblk != ee_block && may_zeroout) {
                        err =  ext4_ext_zeroout(inode, &orig_ex);
                        if (err)
                                goto fix_extent_len;
@@ -2818,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                        ext4_ext_dirty(handle, inode, path + depth);
                        /* zero out the first half */
-                       /* blocks available from iblock */
+                       /* blocks available from map->m_lblk */
                        return allocated;
                }
        }
@@ -2829,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
         */
        if (ex1 && ex1 != ex) {
                ex1 = ex;
-               ex1->ee_len = cpu_to_le16(iblock - ee_block);
+               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
                ext4_ext_mark_uninitialized(ex1);
                ex2 = &newex;
        }
-       /* ex2: iblock to iblock + maxblocks-1 : initialised */
-       ex2->ee_block = cpu_to_le32(iblock);
+       /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */
+       ex2->ee_block = cpu_to_le32(map->m_lblk);
        ext4_ext_store_pblock(ex2, newblock);
        ex2->ee_len = cpu_to_le16(allocated);
        if (ex2 != ex)
@@ -2877,7 +2896,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        goto out;
 insert:
        err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
-       if (err == -ENOSPC) {
+       if (err == -ENOSPC && may_zeroout) {
                err =  ext4_ext_zeroout(inode, &orig_ex);
                if (err)
                        goto fix_extent_len;
@@ -2904,7 +2923,7 @@ fix_extent_len:
 }
 
 /*
- * This function is called by ext4_ext_get_blocks() from
+ * This function is called by ext4_ext_map_blocks() from
  * ext4_get_blocks_dio_write() when DIO to write
  * to an uninitialized extent.
  *
@@ -2927,9 +2946,8 @@ fix_extent_len:
  */
 static int ext4_split_unwritten_extents(handle_t *handle,
                                        struct inode *inode,
+                                       struct ext4_map_blocks *map,
                                        struct ext4_ext_path *path,
-                                       ext4_lblk_t iblock,
-                                       unsigned int max_blocks,
                                        int flags)
 {
        struct ext4_extent *ex, newex, orig_ex;
@@ -2937,41 +2955,55 @@ static int ext4_split_unwritten_extents(handle_t *handle,
        struct ext4_extent *ex2 = NULL;
        struct ext4_extent *ex3 = NULL;
        struct ext4_extent_header *eh;
-       ext4_lblk_t ee_block;
+       ext4_lblk_t ee_block, eof_block;
        unsigned int allocated, ee_len, depth;
        ext4_fsblk_t newblock;
        int err = 0;
+       int may_zeroout;
+
+       ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
+               "block %llu, max_blocks %u\n", inode->i_ino,
+               (unsigned long long)map->m_lblk, map->m_len);
+
+       eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
+               inode->i_sb->s_blocksize_bits;
+       if (eof_block < map->m_lblk + map->m_len)
+               eof_block = map->m_lblk + map->m_len;
 
-       ext_debug("ext4_split_unwritten_extents: inode %lu,"
-                 "iblock %llu, max_blocks %u\n", inode->i_ino,
-                 (unsigned long long)iblock, max_blocks);
        depth = ext_depth(inode);
        eh = path[depth].p_hdr;
        ex = path[depth].p_ext;
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
-       allocated = ee_len - (iblock - ee_block);
-       newblock = iblock - ee_block + ext_pblock(ex);
+       allocated = ee_len - (map->m_lblk - ee_block);
+       newblock = map->m_lblk - ee_block + ext_pblock(ex);
+
        ex2 = ex;
        orig_ex.ee_block = ex->ee_block;
        orig_ex.ee_len   = cpu_to_le16(ee_len);
        ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
 
        /*
+        * It is safe to convert extent to initialized via explicit
+        * zeroout only if extent is fully insde i_size or new_size.
+        */
+       may_zeroout = ee_block + ee_len <= eof_block;
+
+       /*
         * If the uninitialized extent begins at the same logical
         * block where the write begins, and the write completely
         * covers the extent, then we don't need to split it.
         */
-       if ((iblock == ee_block) && (allocated <= max_blocks))
+       if ((map->m_lblk == ee_block) && (allocated <= map->m_len))
                return allocated;
 
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
                goto out;
-       /* ex1: ee_block to iblock - 1 : uninitialized */
-       if (iblock > ee_block) {
+       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
+       if (map->m_lblk > ee_block) {
                ex1 = ex;
-               ex1->ee_len = cpu_to_le16(iblock - ee_block);
+               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
                ext4_ext_mark_uninitialized(ex1);
                ex2 = &newex;
        }
@@ -2980,18 +3012,18 @@ static int ext4_split_unwritten_extents(handle_t *handle,
         * we insert ex3, if ex1 is NULL. This is to avoid temporary
         * overlap of blocks.
         */
-       if (!ex1 && allocated > max_blocks)
-               ex2->ee_len = cpu_to_le16(max_blocks);
+       if (!ex1 && allocated > map->m_len)
+               ex2->ee_len = cpu_to_le16(map->m_len);
        /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > max_blocks) {
+       if (allocated > map->m_len) {
                unsigned int newdepth;
                ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(iblock + max_blocks);
-               ext4_ext_store_pblock(ex3, newblock + max_blocks);
-               ex3->ee_len = cpu_to_le16(allocated - max_blocks);
+               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
+               ext4_ext_store_pblock(ex3, newblock + map->m_len);
+               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
                ext4_ext_mark_uninitialized(ex3);
                err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
-               if (err == -ENOSPC) {
+               if (err == -ENOSPC && may_zeroout) {
                        err =  ext4_ext_zeroout(inode, &orig_ex);
                        if (err)
                                goto fix_extent_len;
@@ -3001,7 +3033,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                        ext4_ext_dirty(handle, inode, path + depth);
                        /* zeroed the full extent */
-                       /* blocks available from iblock */
+                       /* blocks available from map->m_lblk */
                        return allocated;
 
                } else if (err)
@@ -3015,11 +3047,13 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                 * update the extent length after successful insert of the
                 * split extent
                 */
-               orig_ex.ee_len = cpu_to_le16(ee_len -
-                                               ext4_ext_get_actual_len(ex3));
+               ee_len -= ext4_ext_get_actual_len(ex3);
+               orig_ex.ee_len = cpu_to_le16(ee_len);
+               may_zeroout = ee_block + ee_len <= eof_block;
+
                depth = newdepth;
                ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, iblock, path);
+               path = ext4_ext_find_extent(inode, map->m_lblk, path);
                if (IS_ERR(path)) {
                        err = PTR_ERR(path);
                        goto out;
@@ -3033,7 +3067,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                if (err)
                        goto out;
 
-               allocated = max_blocks;
+               allocated = map->m_len;
        }
        /*
         * If there was a change of depth as part of the
@@ -3042,15 +3076,15 @@ static int ext4_split_unwritten_extents(handle_t *handle,
         */
        if (ex1 && ex1 != ex) {
                ex1 = ex;
-               ex1->ee_len = cpu_to_le16(iblock - ee_block);
+               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
                ext4_ext_mark_uninitialized(ex1);
                ex2 = &newex;
        }
        /*
-        * ex2: iblock to iblock + maxblocks-1 : to be direct IO written,
-        * uninitialised still.
+        * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written
+        * using direct I/O, uninitialised still.
         */
-       ex2->ee_block = cpu_to_le32(iblock);
+       ex2->ee_block = cpu_to_le32(map->m_lblk);
        ext4_ext_store_pblock(ex2, newblock);
        ex2->ee_len = cpu_to_le16(allocated);
        ext4_ext_mark_uninitialized(ex2);
@@ -3062,7 +3096,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
        goto out;
 insert:
        err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
-       if (err == -ENOSPC) {
+       if (err == -ENOSPC && may_zeroout) {
                err =  ext4_ext_zeroout(inode, &orig_ex);
                if (err)
                        goto fix_extent_len;
@@ -3152,10 +3186,9 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
 
 static int
 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
-                       ext4_lblk_t iblock, unsigned int max_blocks,
+                       struct ext4_map_blocks *map,
                        struct ext4_ext_path *path, int flags,
-                       unsigned int allocated, struct buffer_head *bh_result,
-                       ext4_fsblk_t newblock)
+                       unsigned int allocated, ext4_fsblk_t newblock)
 {
        int ret = 0;
        int err = 0;
@@ -3163,15 +3196,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 
        ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
                  "block %llu, max_blocks %u, flags %d, allocated %u",
-                 inode->i_ino, (unsigned long long)iblock, max_blocks,
+                 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
                  flags, allocated);
        ext4_ext_show_leaf(inode, path);
 
        /* get_block() before submit the IO, split the extent */
        if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
-               ret = ext4_split_unwritten_extents(handle,
-                                               inode, path, iblock,
-                                               max_blocks, flags);
+               ret = ext4_split_unwritten_extents(handle, inode, map,
+                                                  path, flags);
                /*
                 * Flag the inode(non aio case) or end_io struct (aio case)
                 * that this IO needs to convertion to written when IO is
@@ -3182,7 +3214,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                else
                        ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
                if (ext4_should_dioread_nolock(inode))
-                       set_buffer_uninit(bh_result);
+                       map->m_flags |= EXT4_MAP_UNINIT;
                goto out;
        }
        /* IO end_io complete, convert the filled extent to written */
@@ -3210,14 +3242,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                 * the buffer head will be unmapped so that
                 * a read from the block returns 0s.
                 */
-               set_buffer_unwritten(bh_result);
+               map->m_flags |= EXT4_MAP_UNWRITTEN;
                goto out1;
        }
 
        /* buffered write, writepage time, convert*/
-       ret = ext4_ext_convert_to_initialized(handle, inode,
-                                               path, iblock,
-                                               max_blocks);
+       ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
        if (ret >= 0)
                ext4_update_inode_fsync_trans(handle, inode, 1);
 out:
@@ -3226,7 +3256,7 @@ out:
                goto out2;
        } else
                allocated = ret;
-       set_buffer_new(bh_result);
+       map->m_flags |= EXT4_MAP_NEW;
        /*
         * if we allocated more blocks than requested
         * we need to make sure we unmap the extra block
@@ -3234,11 +3264,11 @@ out:
         * unmapped later when we find the buffer_head marked
         * new.
         */
-       if (allocated > max_blocks) {
+       if (allocated > map->m_len) {
                unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
-                                       newblock + max_blocks,
-                                       allocated - max_blocks);
-               allocated = max_blocks;
+                                       newblock + map->m_len,
+                                       allocated - map->m_len);
+               allocated = map->m_len;
        }
 
        /*
@@ -3252,13 +3282,13 @@ out:
                ext4_da_update_reserve_space(inode, allocated, 0);
 
 map_out:
-       set_buffer_mapped(bh_result);
+       map->m_flags |= EXT4_MAP_MAPPED;
 out1:
-       if (allocated > max_blocks)
-               allocated = max_blocks;
+       if (allocated > map->m_len)
+               allocated = map->m_len;
        ext4_ext_show_leaf(inode, path);
-       bh_result->b_bdev = inode->i_sb->s_bdev;
-       bh_result->b_blocknr = newblock;
+       map->m_pblk = newblock;
+       map->m_len = allocated;
 out2:
        if (path) {
                ext4_ext_drop_refs(path);
@@ -3284,26 +3314,23 @@ out2:
  *
  * return < 0, error case.
  */
-int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
-                       ext4_lblk_t iblock,
-                       unsigned int max_blocks, struct buffer_head *bh_result,
-                       int flags)
+int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
+                       struct ext4_map_blocks *map, int flags)
 {
        struct ext4_ext_path *path = NULL;
        struct ext4_extent_header *eh;
        struct ext4_extent newex, *ex, *last_ex;
        ext4_fsblk_t newblock;
-       int err = 0, depth, ret, cache_type;
+       int i, err = 0, depth, ret, cache_type;
        unsigned int allocated = 0;
        struct ext4_allocation_request ar;
        ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
 
-       __clear_bit(BH_New, &bh_result->b_state);
        ext_debug("blocks %u/%u requested for inode %lu\n",
-                       iblock, max_blocks, inode->i_ino);
+                 map->m_lblk, map->m_len, inode->i_ino);
 
        /* check in cache */
-       cache_type = ext4_ext_in_cache(inode, iblock, &newex);
+       cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex);
        if (cache_type) {
                if (cache_type == EXT4_EXT_CACHE_GAP) {
                        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
@@ -3316,12 +3343,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        /* we should allocate requested block */
                } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
                        /* block is already allocated */
-                       newblock = iblock
+                       newblock = map->m_lblk
                                   - le32_to_cpu(newex.ee_block)
                                   + ext_pblock(&newex);
                        /* number of remaining blocks in the extent */
                        allocated = ext4_ext_get_actual_len(&newex) -
-                                       (iblock - le32_to_cpu(newex.ee_block));
+                               (map->m_lblk - le32_to_cpu(newex.ee_block));
                        goto out;
                } else {
                        BUG();
@@ -3329,7 +3356,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        }
 
        /* find extent for this block */
-       path = ext4_ext_find_extent(inode, iblock, NULL);
+       path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
        if (IS_ERR(path)) {
                err = PTR_ERR(path);
                path = NULL;
@@ -3345,8 +3372,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         */
        if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
                EXT4_ERROR_INODE(inode, "bad extent address "
-                                "iblock: %d, depth: %d pblock %lld",
-                                iblock, depth, path[depth].p_block);
+                                "lblock: %lu, depth: %d pblock %lld",
+                                (unsigned long) map->m_lblk, depth,
+                                path[depth].p_block);
                err = -EIO;
                goto out2;
        }
@@ -3364,12 +3392,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                 */
                ee_len = ext4_ext_get_actual_len(ex);
                /* if found extent covers block, simply return it */
-               if (in_range(iblock, ee_block, ee_len)) {
-                       newblock = iblock - ee_block + ee_start;
+               if (in_range(map->m_lblk, ee_block, ee_len)) {
+                       newblock = map->m_lblk - ee_block + ee_start;
                        /* number of remaining blocks in the extent */
-                       allocated = ee_len - (iblock - ee_block);
-                       ext_debug("%u fit into %u:%d -> %llu\n", iblock,
-                                       ee_block, ee_len, newblock);
+                       allocated = ee_len - (map->m_lblk - ee_block);
+                       ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
+                                 ee_block, ee_len, newblock);
 
                        /* Do not put uninitialized extent in the cache */
                        if (!ext4_ext_is_uninitialized(ex)) {
@@ -3379,8 +3407,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                goto out;
                        }
                        ret = ext4_ext_handle_uninitialized_extents(handle,
-                                       inode, iblock, max_blocks, path,
-                                       flags, allocated, bh_result, newblock);
+                                       inode, map, path, flags, allocated,
+                                       newblock);
                        return ret;
                }
        }
@@ -3394,7 +3422,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                 * put just found gap into cache to speed up
                 * subsequent requests
                 */
-               ext4_ext_put_gap_in_cache(inode, path, iblock);
+               ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
                goto out2;
        }
        /*
@@ -3402,11 +3430,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         */
 
        /* find neighbour allocated blocks */
-       ar.lleft = iblock;
+       ar.lleft = map->m_lblk;
        err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
        if (err)
                goto out2;
-       ar.lright = iblock;
+       ar.lright = map->m_lblk;
        err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
        if (err)
                goto out2;
@@ -3417,26 +3445,26 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
         * EXT_UNINIT_MAX_LEN.
         */
-       if (max_blocks > EXT_INIT_MAX_LEN &&
+       if (map->m_len > EXT_INIT_MAX_LEN &&
            !(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
-               max_blocks = EXT_INIT_MAX_LEN;
-       else if (max_blocks > EXT_UNINIT_MAX_LEN &&
+               map->m_len = EXT_INIT_MAX_LEN;
+       else if (map->m_len > EXT_UNINIT_MAX_LEN &&
                 (flags & EXT4_GET_BLOCKS_UNINIT_EXT))
-               max_blocks = EXT_UNINIT_MAX_LEN;
+               map->m_len = EXT_UNINIT_MAX_LEN;
 
-       /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
-       newex.ee_block = cpu_to_le32(iblock);
-       newex.ee_len = cpu_to_le16(max_blocks);
+       /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
+       newex.ee_block = cpu_to_le32(map->m_lblk);
+       newex.ee_len = cpu_to_le16(map->m_len);
        err = ext4_ext_check_overlap(inode, &newex, path);
        if (err)
                allocated = ext4_ext_get_actual_len(&newex);
        else
-               allocated = max_blocks;
+               allocated = map->m_len;
 
        /* allocate new block */
        ar.inode = inode;
-       ar.goal = ext4_ext_find_goal(inode, path, iblock);
-       ar.logical = iblock;
+       ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
+       ar.logical = map->m_lblk;
        ar.len = allocated;
        if (S_ISREG(inode->i_mode))
                ar.flags = EXT4_MB_HINT_DATA;
@@ -3470,21 +3498,33 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                                                     EXT4_STATE_DIO_UNWRITTEN);
                }
                if (ext4_should_dioread_nolock(inode))
-                       set_buffer_uninit(bh_result);
+                       map->m_flags |= EXT4_MAP_UNINIT;
        }
 
-       if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
+       if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) {
                if (unlikely(!eh->eh_entries)) {
                        EXT4_ERROR_INODE(inode,
-                                        "eh->eh_entries == 0 ee_block %d",
-                                        ex->ee_block);
+                                        "eh->eh_entries == 0 and "
+                                        "EOFBLOCKS_FL set");
                        err = -EIO;
                        goto out2;
                }
                last_ex = EXT_LAST_EXTENT(eh);
-               if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
-                   + ext4_ext_get_actual_len(last_ex))
-                       EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+               /*
+                * If the current leaf block was reached by looking at
+                * the last index block all the way down the tree, and
+                * we are extending the inode beyond the last extent
+                * in the current leaf block, then clear the
+                * EOFBLOCKS_FL flag.
+                */
+               for (i = depth-1; i >= 0; i--) {
+                       if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
+                               break;
+               }
+               if ((i < 0) &&
+                   (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
+                    ext4_ext_get_actual_len(last_ex)))
+                       ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
        }
        err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
        if (err) {
@@ -3500,9 +3540,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
        /* previous routine could use block we allocated */
        newblock = ext_pblock(&newex);
        allocated = ext4_ext_get_actual_len(&newex);
-       if (allocated > max_blocks)
-               allocated = max_blocks;
-       set_buffer_new(bh_result);
+       if (allocated > map->m_len)
+               allocated = map->m_len;
+       map->m_flags |= EXT4_MAP_NEW;
 
        /*
         * Update reserved blocks/metadata blocks after successful
@@ -3516,18 +3556,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
         * when it is _not_ an uninitialized extent.
         */
        if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
-               ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
+               ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock,
                                                EXT4_EXT_CACHE_EXTENT);
                ext4_update_inode_fsync_trans(handle, inode, 1);
        } else
                ext4_update_inode_fsync_trans(handle, inode, 0);
 out:
-       if (allocated > max_blocks)
-               allocated = max_blocks;
+       if (allocated > map->m_len)
+               allocated = map->m_len;
        ext4_ext_show_leaf(inode, path);
-       set_buffer_mapped(bh_result);
-       bh_result->b_bdev = inode->i_sb->s_bdev;
-       bh_result->b_blocknr = newblock;
+       map->m_flags |= EXT4_MAP_MAPPED;
+       map->m_pblk = newblock;
+       map->m_len = allocated;
 out2:
        if (path) {
                ext4_ext_drop_refs(path);
@@ -3625,7 +3665,7 @@ static void ext4_falloc_update_inode(struct inode *inode,
                 * can proceed even if the new size is the same as i_size.
                 */
                if (new_size > i_size_read(inode))
-                       EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
+                       ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
        }
 
 }
@@ -3640,55 +3680,57 @@ static void ext4_falloc_update_inode(struct inode *inode,
 long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 {
        handle_t *handle;
-       ext4_lblk_t block;
        loff_t new_size;
        unsigned int max_blocks;
        int ret = 0;
        int ret2 = 0;
        int retries = 0;
-       struct buffer_head map_bh;
+       struct ext4_map_blocks map;
        unsigned int credits, blkbits = inode->i_blkbits;
 
        /*
         * currently supporting (pre)allocate mode for extent-based
         * files _only_
         */
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
 
        /* preallocation to directories is currently not supported */
        if (S_ISDIR(inode->i_mode))
                return -ENODEV;
 
-       block = offset >> blkbits;
+       map.m_lblk = offset >> blkbits;
        /*
         * We can't just convert len to max_blocks because
         * If blocksize = 4096 offset = 3072 and len = 2048
         */
        max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
-                                                       - block;
+               - map.m_lblk;
        /*
         * credits to insert 1 extent into extent tree
         */
        credits = ext4_chunk_trans_blocks(inode, max_blocks);
        mutex_lock(&inode->i_mutex);
+       ret = inode_newsize_ok(inode, (len + offset));
+       if (ret) {
+               mutex_unlock(&inode->i_mutex);
+               return ret;
+       }
 retry:
        while (ret >= 0 && ret < max_blocks) {
-               block = block + ret;
-               max_blocks = max_blocks - ret;
+               map.m_lblk = map.m_lblk + ret;
+               map.m_len = max_blocks = max_blocks - ret;
                handle = ext4_journal_start(inode, credits);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        break;
                }
-               map_bh.b_state = 0;
-               ret = ext4_get_blocks(handle, inode, block,
-                                     max_blocks, &map_bh,
+               ret = ext4_map_blocks(handle, inode, &map,
                                      EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
                if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
                        WARN_ON(ret <= 0);
-                       printk(KERN_ERR "%s: ext4_ext_get_blocks "
+                       printk(KERN_ERR "%s: ext4_ext_map_blocks "
                                    "returned error inode#%lu, block=%u, "
                                    "max_blocks=%u", __func__,
                                    inode->i_ino, block, max_blocks);
@@ -3697,14 +3739,14 @@ retry:
                        ret2 = ext4_journal_stop(handle);
                        break;
                }
-               if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
+               if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
                                                blkbits) >> blkbits))
                        new_size = offset + len;
                else
-                       new_size = (block + ret) << blkbits;
+                       new_size = (map.m_lblk + ret) << blkbits;
 
                ext4_falloc_update_inode(inode, mode, new_size,
-                                               buffer_new(&map_bh));
+                                        (map.m_flags & EXT4_MAP_NEW));
                ext4_mark_inode_dirty(handle, inode);
                ret2 = ext4_journal_stop(handle);
                if (ret2)
@@ -3733,42 +3775,39 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                                    ssize_t len)
 {
        handle_t *handle;
-       ext4_lblk_t block;
        unsigned int max_blocks;
        int ret = 0;
        int ret2 = 0;
-       struct buffer_head map_bh;
+       struct ext4_map_blocks map;
        unsigned int credits, blkbits = inode->i_blkbits;
 
-       block = offset >> blkbits;
+       map.m_lblk = offset >> blkbits;
        /*
         * We can't just convert len to max_blocks because
         * If blocksize = 4096 offset = 3072 and len = 2048
         */
-       max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
-                                                       - block;
+       max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
+                     map.m_lblk);
        /*
         * credits to insert 1 extent into extent tree
         */
        credits = ext4_chunk_trans_blocks(inode, max_blocks);
        while (ret >= 0 && ret < max_blocks) {
-               block = block + ret;
-               max_blocks = max_blocks - ret;
+               map.m_lblk += ret;
+               map.m_len = (max_blocks -= ret);
                handle = ext4_journal_start(inode, credits);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        break;
                }
-               map_bh.b_state = 0;
-               ret = ext4_get_blocks(handle, inode, block,
-                                     max_blocks, &map_bh,
+               ret = ext4_map_blocks(handle, inode, &map,
                                      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
                if (ret <= 0) {
                        WARN_ON(ret <= 0);
-                       printk(KERN_ERR "%s: ext4_ext_get_blocks "
+                       printk(KERN_ERR "%s: ext4_ext_map_blocks "
                                    "returned error inode#%lu, block=%u, "
                                    "max_blocks=%u", __func__,
-                                   inode->i_ino, block, max_blocks);
+                                   inode->i_ino, map.m_lblk, map.m_len);
                }
                ext4_mark_inode_dirty(handle, inode);
                ret2 = ext4_journal_stop(handle);
@@ -3898,7 +3937,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        int error = 0;
 
        /* fallback to generic here if not in extents fmt */
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return generic_block_fiemap(inode, fieinfo, start, len,
                        ext4_get_block);
 
index d0776e4..5313ae4 100644 (file)
@@ -66,7 +66,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
         * is smaller than s_maxbytes, which is for extent-mapped files.
         */
 
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
                size_t length = iov_length(iov, nr_segs);
 
index ef3d980..b6a74f9 100644 (file)
 #include <trace/events/ext4.h>
 
 /*
+ * If we're not journaling and this is a just-created file, we have to
+ * sync our parent directory (if it was freshly created) since
+ * otherwise it will only be written by writeback, leaving a huge
+ * window during which a crash may lose the file.  This may apply for
+ * the parent directory's parent as well, and so on recursively, if
+ * they are also freshly created.
+ */
+static void ext4_sync_parent(struct inode *inode)
+{
+       struct dentry *dentry = NULL;
+
+       while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
+               ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
+               dentry = list_entry(inode->i_dentry.next,
+                                   struct dentry, d_alias);
+               if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
+                       break;
+               inode = dentry->d_parent->d_inode;
+               sync_mapping_buffers(inode->i_mapping);
+       }
+}
+
+/*
  * akpm: A new design for ext4_sync_file().
  *
  * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
@@ -66,9 +89,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
        ret = flush_completed_IO(inode);
        if (ret < 0)
                return ret;
-       
-       if (!journal)
-               return simple_fsync(file, dentry, datasync);
+
+       if (!journal) {
+               ret = simple_fsync(file, dentry, datasync);
+               if (!ret && !list_empty(&inode->i_dentry))
+                       ext4_sync_parent(inode);
+               return ret;
+       }
 
        /*
         * data=writeback,ordered:
@@ -102,7 +129,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
                    (journal->j_flags & JBD2_BARRIER))
                        blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
                                        NULL, BLKDEV_IFL_WAIT);
-               jbd2_log_wait_commit(journal, commit_tid);
+               ret = jbd2_log_wait_commit(journal, commit_tid);
        } else if (journal->j_flags & JBD2_BARRIER)
                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
                        BLKDEV_IFL_WAIT);
index 1a0e183..25c4b31 100644 (file)
@@ -240,56 +240,49 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
        if (fatal)
                goto error_return;
 
-       /* Ok, now we can actually update the inode bitmaps.. */
-       cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
-                                       bit, bitmap_bh->b_data);
-       if (!cleared)
-               ext4_error(sb, "bit already cleared for inode %lu", ino);
-       else {
-               gdp = ext4_get_group_desc(sb, block_group, &bh2);
-
+       fatal = -ESRCH;
+       gdp = ext4_get_group_desc(sb, block_group, &bh2);
+       if (gdp) {
                BUFFER_TRACE(bh2, "get_write_access");
                fatal = ext4_journal_get_write_access(handle, bh2);
-               if (fatal) goto error_return;
-
-               if (gdp) {
-                       ext4_lock_group(sb, block_group);
-                       count = ext4_free_inodes_count(sb, gdp) + 1;
-                       ext4_free_inodes_set(sb, gdp, count);
-                       if (is_directory) {
-                               count = ext4_used_dirs_count(sb, gdp) - 1;
-                               ext4_used_dirs_set(sb, gdp, count);
-                               if (sbi->s_log_groups_per_flex) {
-                                       ext4_group_t f;
-
-                                       f = ext4_flex_group(sbi, block_group);
-                                       atomic_dec(&sbi->s_flex_groups[f].used_dirs);
-                               }
+       }
+       ext4_lock_group(sb, block_group);
+       cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
+       if (fatal || !cleared) {
+               ext4_unlock_group(sb, block_group);
+               goto out;
+       }
 
-                       }
-                       gdp->bg_checksum = ext4_group_desc_csum(sbi,
-                                                       block_group, gdp);
-                       ext4_unlock_group(sb, block_group);
-                       percpu_counter_inc(&sbi->s_freeinodes_counter);
-                       if (is_directory)
-                               percpu_counter_dec(&sbi->s_dirs_counter);
-
-                       if (sbi->s_log_groups_per_flex) {
-                               ext4_group_t f;
-
-                               f = ext4_flex_group(sbi, block_group);
-                               atomic_inc(&sbi->s_flex_groups[f].free_inodes);
-                       }
-               }
-               BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
-               err = ext4_handle_dirty_metadata(handle, NULL, bh2);
-               if (!fatal) fatal = err;
+       count = ext4_free_inodes_count(sb, gdp) + 1;
+       ext4_free_inodes_set(sb, gdp, count);
+       if (is_directory) {
+               count = ext4_used_dirs_count(sb, gdp) - 1;
+               ext4_used_dirs_set(sb, gdp, count);
+               percpu_counter_dec(&sbi->s_dirs_counter);
        }
-       BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
-       err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
-       if (!fatal)
-               fatal = err;
-       sb->s_dirt = 1;
+       gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
+       ext4_unlock_group(sb, block_group);
+
+       percpu_counter_inc(&sbi->s_freeinodes_counter);
+       if (sbi->s_log_groups_per_flex) {
+               ext4_group_t f = ext4_flex_group(sbi, block_group);
+
+               atomic_inc(&sbi->s_flex_groups[f].free_inodes);
+               if (is_directory)
+                       atomic_dec(&sbi->s_flex_groups[f].used_dirs);
+       }
+       BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+       fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
+out:
+       if (cleared) {
+               BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+               if (!fatal)
+                       fatal = err;
+               sb->s_dirt = 1;
+       } else
+               ext4_error(sb, "bit already cleared for inode %lu", ino);
+
 error_return:
        brelse(bitmap_bh);
        ext4_std_error(sb, fatal);
@@ -499,7 +492,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
 
        if (S_ISDIR(mode) &&
            ((parent == sb->s_root->d_inode) ||
-            (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
+            (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
                int best_ndir = inodes_per_group;
                int ret = -1;
 
@@ -1041,7 +1034,7 @@ got:
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
                /* set extent flag only for directory, file and normal symlink*/
                if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
-                       EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+                       ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
                        ext4_ext_tree_init(handle, inode);
                }
        }
index 3e0f6af..19df61c 100644 (file)
@@ -149,7 +149,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
        int ret;
 
        /*
-        * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this
+        * Drop i_data_sem to avoid deadlock with ext4_map_blocks.  At this
         * moment, get_block can be called only for blocks inside i_size since
         * page cache has been already dropped and writes are blocked by
         * i_mutex. So we can safely drop the i_data_sem here.
@@ -348,9 +348,8 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
                if (blk &&
                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                    blk, 1))) {
-                       __ext4_error(inode->i_sb, function,
-                                  "invalid block reference %u "
-                                  "in inode #%lu", blk, inode->i_ino);
+                       ext4_error_inode(function, inode,
+                                        "invalid block reference %u", blk);
                        return -EIO;
                }
        }
@@ -785,7 +784,7 @@ failed:
        /* Allocation failed, free what we already allocated */
        ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
        for (i = 1; i <= n ; i++) {
-               /* 
+               /*
                 * branch[i].bh is newly allocated, so there is no
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
@@ -875,7 +874,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 
 err_out:
        for (i = 1; i <= num; i++) {
-               /* 
+               /*
                 * branch[i].bh is newly allocated, so there is no
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
@@ -890,9 +889,9 @@ err_out:
 }
 
 /*
- * The ext4_ind_get_blocks() function handles non-extents inodes
+ * The ext4_ind_map_blocks() function handles non-extents inodes
  * (i.e., using the traditional indirect/double-indirect i_blocks
- * scheme) for ext4_get_blocks().
+ * scheme) for ext4_map_blocks().
  *
  * Allocation strategy is simple: if we have to allocate something, we will
  * have to go the whole way to leaf. So let's do it before attaching anything
@@ -917,9 +916,8 @@ err_out:
  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
  * blocks.
  */
-static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
-                              ext4_lblk_t iblock, unsigned int maxblocks,
-                              struct buffer_head *bh_result,
+static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+                              struct ext4_map_blocks *map,
                               int flags)
 {
        int err = -EIO;
@@ -933,9 +931,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
        int count = 0;
        ext4_fsblk_t first_block = 0;
 
-       J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
+       J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
        J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
-       depth = ext4_block_to_path(inode, iblock, offsets,
+       depth = ext4_block_to_path(inode, map->m_lblk, offsets,
                                   &blocks_to_boundary);
 
        if (depth == 0)
@@ -946,10 +944,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
        /* Simplest case - block found, no allocation needed */
        if (!partial) {
                first_block = le32_to_cpu(chain[depth - 1].key);
-               clear_buffer_new(bh_result);
                count++;
                /*map more blocks*/
-               while (count < maxblocks && count <= blocks_to_boundary) {
+               while (count < map->m_len && count <= blocks_to_boundary) {
                        ext4_fsblk_t blk;
 
                        blk = le32_to_cpu(*(chain[depth-1].p + count));
@@ -969,7 +966,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
        /*
         * Okay, we need to do block allocation.
        */
-       goal = ext4_find_goal(inode, iblock, partial);
+       goal = ext4_find_goal(inode, map->m_lblk, partial);
 
        /* the number of blocks need to allocate for [d,t]indirect blocks */
        indirect_blks = (chain + depth) - partial - 1;
@@ -979,11 +976,11 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
         * direct blocks to allocate for this branch.
         */
        count = ext4_blks_to_allocate(partial, indirect_blks,
-                                       maxblocks, blocks_to_boundary);
+                                     map->m_len, blocks_to_boundary);
        /*
         * Block out ext4_truncate while we alter the tree
         */
-       err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
+       err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
                                &count, goal,
                                offsets + (partial - chain), partial);
 
@@ -995,18 +992,20 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
         * may need to return -EAGAIN upwards in the worst case.  --sct
         */
        if (!err)
-               err = ext4_splice_branch(handle, inode, iblock,
+               err = ext4_splice_branch(handle, inode, map->m_lblk,
                                         partial, indirect_blks, count);
        if (err)
                goto cleanup;
 
-       set_buffer_new(bh_result);
+       map->m_flags |= EXT4_MAP_NEW;
 
        ext4_update_inode_fsync_trans(handle, inode, 1);
 got_it:
-       map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+       map->m_flags |= EXT4_MAP_MAPPED;
+       map->m_pblk = le32_to_cpu(chain[depth-1].key);
+       map->m_len = count;
        if (count > blocks_to_boundary)
-               set_buffer_boundary(bh_result);
+               map->m_flags |= EXT4_MAP_BOUNDARY;
        err = count;
        /* Clean up and exit */
        partial = chain + depth - 1;    /* the whole chain */
@@ -1016,7 +1015,6 @@ cleanup:
                brelse(partial->bh);
                partial--;
        }
-       BUFFER_TRACE(bh_result, "returned");
 out:
        return err;
 }
@@ -1061,7 +1059,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
  */
 static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
 {
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                return ext4_ext_calc_metadata_amount(inode, lblock);
 
        return ext4_indirect_calc_metadata_amount(inode, lblock);
@@ -1076,7 +1074,6 @@ void ext4_da_update_reserve_space(struct inode *inode,
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-       int mdb_free = 0, allocated_meta_blocks = 0;
 
        spin_lock(&ei->i_block_reservation_lock);
        trace_ext4_da_update_reserve_space(inode, used);
@@ -1091,11 +1088,10 @@ void ext4_da_update_reserve_space(struct inode *inode,
 
        /* Update per-inode reservations */
        ei->i_reserved_data_blocks -= used;
-       used += ei->i_allocated_meta_blocks;
        ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
-       allocated_meta_blocks = ei->i_allocated_meta_blocks;
+       percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                          used + ei->i_allocated_meta_blocks);
        ei->i_allocated_meta_blocks = 0;
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
 
        if (ei->i_reserved_data_blocks == 0) {
                /*
@@ -1103,30 +1099,23 @@ void ext4_da_update_reserve_space(struct inode *inode,
                 * only when we have written all of the delayed
                 * allocation blocks.
                 */
-               mdb_free = ei->i_reserved_meta_blocks;
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                  ei->i_reserved_meta_blocks);
                ei->i_reserved_meta_blocks = 0;
                ei->i_da_metadata_calc_len = 0;
-               percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
        }
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-       /* Update quota subsystem */
-       if (quota_claim) {
+       /* Update quota subsystem for data blocks */
+       if (quota_claim)
                dquot_claim_block(inode, used);
-               if (mdb_free)
-                       dquot_release_reservation_block(inode, mdb_free);
-       } else {
+       else {
                /*
                 * We did fallocate with an offset that is already delayed
                 * allocated. So on delayed allocated writeback we should
-                * not update the quota for allocated blocks. But then
-                * converting an fallocate region to initialized region would
-                * have caused a metadata allocation. So claim quota for
-                * that
+                * not re-claim the quota for fallocated blocks.
                 */
-               if (allocated_meta_blocks)
-                       dquot_claim_block(inode, allocated_meta_blocks);
-               dquot_release_reservation_block(inode, mdb_free + used);
+               dquot_release_reservation_block(inode, used);
        }
 
        /*
@@ -1139,15 +1128,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
                ext4_discard_preallocations(inode);
 }
 
-static int check_block_validity(struct inode *inode, const char *msg,
-                               sector_t logical, sector_t phys, int len)
+static int check_block_validity(struct inode *inode, const char *func,
+                               struct ext4_map_blocks *map)
 {
-       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
-               __ext4_error(inode->i_sb, msg,
-                          "inode #%lu logical block %llu mapped to %llu "
-                          "(size %d)", inode->i_ino,
-                          (unsigned long long) logical,
-                          (unsigned long long) phys, len);
+       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
+                                  map->m_len)) {
+               ext4_error_inode(func, inode,
+                          "lblock %lu mapped to illegal pblock %llu "
+                          "(length %d)", (unsigned long) map->m_lblk,
+                                map->m_pblk, map->m_len);
                return -EIO;
        }
        return 0;
@@ -1212,15 +1201,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
 }
 
 /*
- * The ext4_get_blocks() function tries to look up the requested blocks,
+ * The ext4_map_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
  *
  * Otherwise it takes the write lock of the i_data_sem and allocate blocks
  * and store the allocated blocks in the result buffer head and mark it
  * mapped.
  *
- * If file type is extents based, it will call ext4_ext_get_blocks(),
- * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
+ * If file type is extents based, it will call ext4_ext_map_blocks(),
+ * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
  * based files
  *
  * On success, it returns the number of blocks being mapped or allocate.
@@ -1233,35 +1222,29 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
  *
  * It returns the error in case of allocation failure.
  */
-int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
-                   unsigned int max_blocks, struct buffer_head *bh,
-                   int flags)
+int ext4_map_blocks(handle_t *handle, struct inode *inode,
+                   struct ext4_map_blocks *map, int flags)
 {
        int retval;
 
-       clear_buffer_mapped(bh);
-       clear_buffer_unwritten(bh);
-
-       ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u,"
-                 "logical block %lu\n", inode->i_ino, flags, max_blocks,
-                 (unsigned long)block);
+       map->m_flags = 0;
+       ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
+                 "logical block %lu\n", inode->i_ino, flags, map->m_len,
+                 (unsigned long) map->m_lblk);
        /*
         * Try to see if we can get the block without requesting a new
         * file system block.
         */
        down_read((&EXT4_I(inode)->i_data_sem));
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                               bh, 0);
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               retval = ext4_ext_map_blocks(handle, inode, map, 0);
        } else {
-               retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
-                                            bh, 0);
+               retval = ext4_ind_map_blocks(handle, inode, map, 0);
        }
        up_read((&EXT4_I(inode)->i_data_sem));
 
-       if (retval > 0 && buffer_mapped(bh)) {
-               int ret = check_block_validity(inode, "file system corruption",
-                                              block, bh->b_blocknr, retval);
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+               int ret = check_block_validity(inode, __func__, map);
                if (ret != 0)
                        return ret;
        }
@@ -1277,7 +1260,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
         * ext4_ext_get_block() returns th create = 0
         * with buffer head unmapped.
         */
-       if (retval > 0 && buffer_mapped(bh))
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
                return retval;
 
        /*
@@ -1290,7 +1273,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
         * of BH_Unwritten and BH_Mapped flags being simultaneously
         * set on the buffer_head.
         */
-       clear_buffer_unwritten(bh);
+       map->m_flags &= ~EXT4_MAP_UNWRITTEN;
 
        /*
         * New blocks allocate and/or writing to uninitialized extent
@@ -1312,14 +1295,12 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
         * We need to check for EXT4 here because migrate
         * could have changed the inode type in between
         */
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
-               retval =  ext4_ext_get_blocks(handle, inode, block, max_blocks,
-                                             bh, flags);
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+               retval = ext4_ext_map_blocks(handle, inode, map, flags);
        } else {
-               retval = ext4_ind_get_blocks(handle, inode, block,
-                                            max_blocks, bh, flags);
+               retval = ext4_ind_map_blocks(handle, inode, map, flags);
 
-               if (retval > 0 && buffer_new(bh)) {
+               if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
                        /*
                         * We allocated new blocks which will result in
                         * i_data's format changing.  Force the migrate
@@ -1342,10 +1323,10 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
                EXT4_I(inode)->i_delalloc_reserved_flag = 0;
 
        up_write((&EXT4_I(inode)->i_data_sem));
-       if (retval > 0 && buffer_mapped(bh)) {
-               int ret = check_block_validity(inode, "file system "
-                                              "corruption after allocation",
-                                              block, bh->b_blocknr, retval);
+       if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
+               int ret = check_block_validity(inode,
+                                              "ext4_map_blocks_after_alloc",
+                                              map);
                if (ret != 0)
                        return ret;
        }
@@ -1355,109 +1336,109 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
 /* Maximum number of blocks we map for direct IO at once. */
 #define DIO_MAX_BLOCKS 4096
 
-int ext4_get_block(struct inode *inode, sector_t iblock,
-                  struct buffer_head *bh_result, int create)
+static int _ext4_get_block(struct inode *inode, sector_t iblock,
+                          struct buffer_head *bh, int flags)
 {
        handle_t *handle = ext4_journal_current_handle();
+       struct ext4_map_blocks map;
        int ret = 0, started = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
        int dio_credits;
 
-       if (create && !handle) {
+       map.m_lblk = iblock;
+       map.m_len = bh->b_size >> inode->i_blkbits;
+
+       if (flags && !handle) {
                /* Direct IO write... */
-               if (max_blocks > DIO_MAX_BLOCKS)
-                       max_blocks = DIO_MAX_BLOCKS;
-               dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
+               if (map.m_len > DIO_MAX_BLOCKS)
+                       map.m_len = DIO_MAX_BLOCKS;
+               dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
                handle = ext4_journal_start(inode, dio_credits);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
-                       goto out;
+                       return ret;
                }
                started = 1;
        }
 
-       ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-                             create ? EXT4_GET_BLOCKS_CREATE : 0);
+       ret = ext4_map_blocks(handle, inode, &map, flags);
        if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
+               map_bh(bh, inode->i_sb, map.m_pblk);
+               bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+               bh->b_size = inode->i_sb->s_blocksize * map.m_len;
                ret = 0;
        }
        if (started)
                ext4_journal_stop(handle);
-out:
        return ret;
 }
 
+int ext4_get_block(struct inode *inode, sector_t iblock,
+                  struct buffer_head *bh, int create)
+{
+       return _ext4_get_block(inode, iblock, bh,
+                              create ? EXT4_GET_BLOCKS_CREATE : 0);
+}
+
 /*
  * `handle' can be NULL if create is zero
  */
 struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
                                ext4_lblk_t block, int create, int *errp)
 {
-       struct buffer_head dummy;
+       struct ext4_map_blocks map;
+       struct buffer_head *bh;
        int fatal = 0, err;
-       int flags = 0;
 
        J_ASSERT(handle != NULL || create == 0);
 
-       dummy.b_state = 0;
-       dummy.b_blocknr = -1000;
-       buffer_trace_init(&dummy.b_history);
-       if (create)
-               flags |= EXT4_GET_BLOCKS_CREATE;
-       err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
-       /*
-        * ext4_get_blocks() returns number of blocks mapped. 0 in
-        * case of a HOLE.
-        */
-       if (err > 0) {
-               if (err > 1)
-                       WARN_ON(1);
-               err = 0;
+       map.m_lblk = block;
+       map.m_len = 1;
+       err = ext4_map_blocks(handle, inode, &map,
+                             create ? EXT4_GET_BLOCKS_CREATE : 0);
+
+       if (err < 0)
+               *errp = err;
+       if (err <= 0)
+               return NULL;
+       *errp = 0;
+
+       bh = sb_getblk(inode->i_sb, map.m_pblk);
+       if (!bh) {
+               *errp = -EIO;
+               return NULL;
        }
-       *errp = err;
-       if (!err && buffer_mapped(&dummy)) {
-               struct buffer_head *bh;
-               bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
-               if (!bh) {
-                       *errp = -EIO;
-                       goto err;
-               }
-               if (buffer_new(&dummy)) {
-                       J_ASSERT(create != 0);
-                       J_ASSERT(handle != NULL);
+       if (map.m_flags & EXT4_MAP_NEW) {
+               J_ASSERT(create != 0);
+               J_ASSERT(handle != NULL);
 
-                       /*
-                        * Now that we do not always journal data, we should
-                        * keep in mind whether this should always journal the
-                        * new buffer as metadata.  For now, regular file
-                        * writes use ext4_get_block instead, so it's not a
-                        * problem.
-                        */
-                       lock_buffer(bh);
-                       BUFFER_TRACE(bh, "call get_create_access");
-                       fatal = ext4_journal_get_create_access(handle, bh);
-                       if (!fatal && !buffer_uptodate(bh)) {
-                               memset(bh->b_data, 0, inode->i_sb->s_blocksize);
-                               set_buffer_uptodate(bh);
-                       }
-                       unlock_buffer(bh);
-                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-                       err = ext4_handle_dirty_metadata(handle, inode, bh);
-                       if (!fatal)
-                               fatal = err;
-               } else {
-                       BUFFER_TRACE(bh, "not a new buffer");
-               }
-               if (fatal) {
-                       *errp = fatal;
-                       brelse(bh);
-                       bh = NULL;
+               /*
+                * Now that we do not always journal data, we should
+                * keep in mind whether this should always journal the
+                * new buffer as metadata.  For now, regular file
+                * writes use ext4_get_block instead, so it's not a
+                * problem.
+                */
+               lock_buffer(bh);
+               BUFFER_TRACE(bh, "call get_create_access");
+               fatal = ext4_journal_get_create_access(handle, bh);
+               if (!fatal && !buffer_uptodate(bh)) {
+                       memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+                       set_buffer_uptodate(bh);
                }
-               return bh;
+               unlock_buffer(bh);
+               BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
+               if (!fatal)
+                       fatal = err;
+       } else {
+               BUFFER_TRACE(bh, "not a new buffer");
        }
-err:
-       return NULL;
+       if (fatal) {
+               *errp = fatal;
+               brelse(bh);
+               bh = NULL;
+       }
+       return bh;
 }
 
 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
@@ -1860,7 +1841,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
        int retries = 0;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct ext4_inode_info *ei = EXT4_I(inode);
-       unsigned long md_needed, md_reserved;
+       unsigned long md_needed;
        int ret;
 
        /*
@@ -1870,22 +1851,24 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
         */
 repeat:
        spin_lock(&ei->i_block_reservation_lock);
-       md_reserved = ei->i_reserved_meta_blocks;
        md_needed = ext4_calc_metadata_amount(inode, lblock);
        trace_ext4_da_reserve_space(inode, md_needed);
        spin_unlock(&ei->i_block_reservation_lock);
 
        /*
-        * Make quota reservation here to prevent quota overflow
-        * later. Real quota accounting is done at pages writeout
-        * time.
+        * We will charge metadata quota at writeout time; this saves
+        * us from metadata over-estimation, though we may go over by
+        * a small amount in the end.  Here we just reserve for data.
         */
-       ret = dquot_reserve_block(inode, md_needed + 1);
+       ret = dquot_reserve_block(inode, 1);
        if (ret)
                return ret;
-
+       /*
+        * We do still charge estimated metadata to the sb though;
+        * we cannot afford to run out of free blocks.
+        */
        if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
-               dquot_release_reservation_block(inode, md_needed + 1);
+               dquot_release_reservation_block(inode, 1);
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
                        yield();
                        goto repeat;
@@ -1910,6 +1893,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
 
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 
+       trace_ext4_da_release_space(inode, to_free);
        if (unlikely(to_free > ei->i_reserved_data_blocks)) {
                /*
                 * if there aren't enough reserved blocks, then the
@@ -1932,12 +1916,13 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
                 * only when we have written all of the delayed
                 * allocation blocks.
                 */
-               to_free += ei->i_reserved_meta_blocks;
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                  ei->i_reserved_meta_blocks);
                ei->i_reserved_meta_blocks = 0;
                ei->i_da_metadata_calc_len = 0;
        }
 
-       /* update fs dirty blocks counter */
+       /* update fs dirty data blocks counter */
        percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
 
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -2042,28 +2027,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
 /*
  * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
  *
- * @mpd->inode - inode to walk through
- * @exbh->b_blocknr - first block on a disk
- * @exbh->b_size - amount of space in bytes
- * @logical - first logical block to start assignment with
- *
  * the function goes through all passed space and put actual disk
  * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
  */
-static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
-                                struct buffer_head *exbh)
+static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
+                                struct ext4_map_blocks *map)
 {
        struct inode *inode = mpd->inode;
        struct address_space *mapping = inode->i_mapping;
-       int blocks = exbh->b_size >> inode->i_blkbits;
-       sector_t pblock = exbh->b_blocknr, cur_logical;
+       int blocks = map->m_len;
+       sector_t pblock = map->m_pblk, cur_logical;
        struct buffer_head *head, *bh;
        pgoff_t index, end;
        struct pagevec pvec;
        int nr_pages, i;
 
-       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
        cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
 
        pagevec_init(&pvec, 0);
@@ -2090,17 +2070,16 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
 
                        /* skip blocks out of the range */
                        do {
-                               if (cur_logical >= logical)
+                               if (cur_logical >= map->m_lblk)
                                        break;
                                cur_logical++;
                        } while ((bh = bh->b_this_page) != head);
 
                        do {
-                               if (cur_logical >= logical + blocks)
+                               if (cur_logical >= map->m_lblk + blocks)
                                        break;
 
-                               if (buffer_delay(bh) ||
-                                               buffer_unwritten(bh)) {
+                               if (buffer_delay(bh) || buffer_unwritten(bh)) {
 
                                        BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
 
@@ -2119,7 +2098,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
                                } else if (buffer_mapped(bh))
                                        BUG_ON(bh->b_blocknr != pblock);
 
-                               if (buffer_uninit(exbh))
+                               if (map->m_flags & EXT4_MAP_UNINIT)
                                        set_buffer_uninit(bh);
                                cur_logical++;
                                pblock++;
@@ -2130,21 +2109,6 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
 }
 
 
-/*
- * __unmap_underlying_blocks - just a helper function to unmap
- * set of blocks described by @bh
- */
-static inline void __unmap_underlying_blocks(struct inode *inode,
-                                            struct buffer_head *bh)
-{
-       struct block_device *bdev = inode->i_sb->s_bdev;
-       int blocks, i;
-
-       blocks = bh->b_size >> inode->i_blkbits;
-       for (i = 0; i < blocks; i++)
-               unmap_underlying_metadata(bdev, bh->b_blocknr + i);
-}
-
 static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
                                        sector_t logical, long blk_cnt)
 {
@@ -2206,7 +2170,7 @@ static void ext4_print_free_blocks(struct inode *inode)
 static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
        int err, blks, get_blocks_flags;
-       struct buffer_head new;
+       struct ext4_map_blocks map;
        sector_t next = mpd->b_blocknr;
        unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
        loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
@@ -2247,15 +2211,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
         * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
         * variables are updated after the blocks have been allocated.
         */
-       new.b_state = 0;
+       map.m_lblk = next;
+       map.m_len = max_blocks;
        get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
        if (ext4_should_dioread_nolock(mpd->inode))
                get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
        if (mpd->b_state & (1 << BH_Delay))
                get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
 
-       blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
-                              &new, get_blocks_flags);
+       blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
        if (blks < 0) {
                err = blks;
                /*
@@ -2282,7 +2246,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
                ext4_msg(mpd->inode->i_sb, KERN_CRIT,
                         "delayed block allocation failed for inode %lu at "
                         "logical offset %llu with max blocks %zd with "
-                        "error %d\n", mpd->inode->i_ino,
+                        "error %d", mpd->inode->i_ino,
                         (unsigned long long) next,
                         mpd->b_size >> mpd->inode->i_blkbits, err);
                printk(KERN_CRIT "This should not happen!!  "
@@ -2297,10 +2261,13 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
        }
        BUG_ON(blks == 0);
 
-       new.b_size = (blks << mpd->inode->i_blkbits);
+       if (map.m_flags & EXT4_MAP_NEW) {
+               struct block_device *bdev = mpd->inode->i_sb->s_bdev;
+               int i;
 
-       if (buffer_new(&new))
-               __unmap_underlying_blocks(mpd->inode, &new);
+               for (i = 0; i < map.m_len; i++)
+                       unmap_underlying_metadata(bdev, map.m_pblk + i);
+       }
 
        /*
         * If blocks are delayed marked, we need to
@@ -2308,7 +2275,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
         */
        if ((mpd->b_state & (1 << BH_Delay)) ||
            (mpd->b_state & (1 << BH_Unwritten)))
-               mpage_put_bnr_to_bhs(mpd, next, &new);
+               mpage_put_bnr_to_bhs(mpd, &map);
 
        if (ext4_should_order_data(mpd->inode)) {
                err = ext4_jbd2_file_inode(handle, mpd->inode);
@@ -2349,8 +2316,17 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
        sector_t next;
        int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
 
+       /*
+        * XXX Don't go larger than mballoc is willing to allocate
+        * This is a stopgap solution.  We eventually need to fold
+        * mpage_da_submit_io() into this function and then call
+        * ext4_get_blocks() multiple times in a loop
+        */
+       if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
+               goto flush_it;
+
        /* check if thereserved journal credits might overflow */
-       if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) {
                if (nrblocks >= EXT4_MAX_TRANS_DATA) {
                        /*
                         * With non-extent format we are limited by the journal
@@ -2423,17 +2399,6 @@ static int __mpage_da_writepage(struct page *page,
        struct buffer_head *bh, *head;
        sector_t logical;
 
-       if (mpd->io_done) {
-               /*
-                * Rest of the page in the page_vec
-                * redirty then and skip then. We will
-                * try to write them again after
-                * starting a new transaction
-                */
-               redirty_page_for_writepage(wbc, page);
-               unlock_page(page);
-               return MPAGE_DA_EXTENT_TAIL;
-       }
        /*
         * Can we merge this page to current extent?
         */
@@ -2528,8 +2493,9 @@ static int __mpage_da_writepage(struct page *page,
  * initialized properly.
  */
 static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
-                                 struct buffer_head *bh_result, int create)
+                                 struct buffer_head *bh, int create)
 {
+       struct ext4_map_blocks map;
        int ret = 0;
        sector_t invalid_block = ~((sector_t) 0xffff);
 
@@ -2537,16 +2503,22 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                invalid_block = ~0;
 
        BUG_ON(create == 0);
-       BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+       BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
+
+       map.m_lblk = iblock;
+       map.m_len = 1;
 
        /*
         * first, we need to know whether the block is allocated already
         * preallocated blocks are unmapped but should treated
         * the same as allocated blocks.
         */
-       ret = ext4_get_blocks(NULL, inode, iblock, 1,  bh_result, 0);
-       if ((ret == 0) && !buffer_delay(bh_result)) {
-               /* the block isn't (pre)allocated yet, let's reserve space */
+       ret = ext4_map_blocks(NULL, inode, &map, 0);
+       if (ret < 0)
+               return ret;
+       if (ret == 0) {
+               if (buffer_delay(bh))
+                       return 0; /* Not sure this could or should happen */
                /*
                 * XXX: __block_prepare_write() unmaps passed block,
                 * is it OK?
@@ -2556,26 +2528,26 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                        /* not enough space to reserve */
                        return ret;
 
-               map_bh(bh_result, inode->i_sb, invalid_block);
-               set_buffer_new(bh_result);
-               set_buffer_delay(bh_result);
-       } else if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               if (buffer_unwritten(bh_result)) {
-                       /* A delayed write to unwritten bh should
-                        * be marked new and mapped.  Mapped ensures
-                        * that we don't do get_block multiple times
-                        * when we write to the same offset and new
-                        * ensures that we do proper zero out for
-                        * partial write.
-                        */
-                       set_buffer_new(bh_result);
-                       set_buffer_mapped(bh_result);
-               }
-               ret = 0;
+               map_bh(bh, inode->i_sb, invalid_block);
+               set_buffer_new(bh);
+               set_buffer_delay(bh);
+               return 0;
        }
 
-       return ret;
+       map_bh(bh, inode->i_sb, map.m_pblk);
+       bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+
+       if (buffer_unwritten(bh)) {
+               /* A delayed write to unwritten bh should be marked
+                * new and mapped.  Mapped ensures that we don't do
+                * get_block multiple times when we write to the same
+                * offset and new ensures that we do proper zero out
+                * for partial write.
+                */
+               set_buffer_new(bh);
+               set_buffer_mapped(bh);
+       }
+       return 0;
 }
 
 /*
@@ -2597,21 +2569,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
                                   struct buffer_head *bh_result, int create)
 {
-       int ret = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-
        BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
-
-       /*
-        * we don't want to do block allocation in writepage
-        * so call get_block_wrap with create = 0
-        */
-       ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
-       if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               ret = 0;
-       }
-       return ret;
+       return _ext4_get_block(inode, iblock, bh_result, 0);
 }
 
 static int bget_one(handle_t *handle, struct buffer_head *bh)
@@ -2821,13 +2780,131 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
         * number of contiguous block. So we will limit
         * number of contiguous block to a sane value
         */
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
            (max_blocks > EXT4_MAX_TRANS_DATA))
                max_blocks = EXT4_MAX_TRANS_DATA;
 
        return ext4_chunk_trans_blocks(inode, max_blocks);
 }
 
+/*
+ * write_cache_pages_da - walk the list of dirty pages of the given
+ * address space and call the callback function (which usually writes
+ * the pages).
+ *
+ * This is a forked version of write_cache_pages().  Differences:
+ *     Range cyclic is ignored.
+ *     no_nrwrite_index_update is always presumed true
+ */
+static int write_cache_pages_da(struct address_space *mapping,
+                               struct writeback_control *wbc,
+                               struct mpage_da_data *mpd)
+{
+       int ret = 0;
+       int done = 0;
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;            /* Inclusive */
+       long nr_to_write = wbc->nr_to_write;
+
+       pagevec_init(&pvec, 0);
+       index = wbc->range_start >> PAGE_CACHE_SHIFT;
+       end = wbc->range_end >> PAGE_CACHE_SHIFT;
+
+       while (!done && (index <= end)) {
+               int i;
+
+               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                             PAGECACHE_TAG_DIRTY,
+                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+               if (nr_pages == 0)
+                       break;
+
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       /*
+                        * At this point, the page may be truncated or
+                        * invalidated (changing page->mapping to NULL), or
+                        * even swizzled back from swapper_space to tmpfs file
+                        * mapping. However, page->index will not change
+                        * because we have a reference on the page.
+                        */
+                       if (page->index > end) {
+                               done = 1;
+                               break;
+                       }
+
+                       lock_page(page);
+
+                       /*
+                        * Page truncated or invalidated. We can freely skip it
+                        * then, even for data integrity operations: the page
+                        * has disappeared concurrently, so there could be no
+                        * real expectation of this data interity operation
+                        * even if there is now a new, dirty page at the same
+                        * pagecache address.
+                        */
+                       if (unlikely(page->mapping != mapping)) {
+continue_unlock:
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (!PageDirty(page)) {
+                               /* someone wrote it for us */
+                               goto continue_unlock;
+                       }
+
+                       if (PageWriteback(page)) {
+                               if (wbc->sync_mode != WB_SYNC_NONE)
+                                       wait_on_page_writeback(page);
+                               else
+                                       goto continue_unlock;
+                       }
+
+                       BUG_ON(PageWriteback(page));
+                       if (!clear_page_dirty_for_io(page))
+                               goto continue_unlock;
+
+                       ret = __mpage_da_writepage(page, wbc, mpd);
+                       if (unlikely(ret)) {
+                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
+                                       unlock_page(page);
+                                       ret = 0;
+                               } else {
+                                       done = 1;
+                                       break;
+                               }
+                       }
+
+                       if (nr_to_write > 0) {
+                               nr_to_write--;
+                               if (nr_to_write == 0 &&
+                                   wbc->sync_mode == WB_SYNC_NONE) {
+                                       /*
+                                        * We stop writing back only if we are
+                                        * not doing integrity sync. In case of
+                                        * integrity sync we have to keep going
+                                        * because someone may be concurrently
+                                        * dirtying pages, and we might have
+                                        * synced a lot of newly appeared dirty
+                                        * pages, but have not synced all of the
+                                        * old dirty pages.
+                                        */
+                                       done = 1;
+                                       break;
+                               }
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       return ret;
+}
+
+
 static int ext4_da_writepages(struct address_space *mapping,
                              struct writeback_control *wbc)
 {
@@ -2836,7 +2913,6 @@ static int ext4_da_writepages(struct address_space *mapping,
        handle_t *handle = NULL;
        struct mpage_da_data mpd;
        struct inode *inode = mapping->host;
-       int no_nrwrite_index_update;
        int pages_written = 0;
        long pages_skipped;
        unsigned int max_pages;
@@ -2916,12 +2992,6 @@ static int ext4_da_writepages(struct address_space *mapping,
        mpd.wbc = wbc;
        mpd.inode = mapping->host;
 
-       /*
-        * we don't want write_cache_pages to update
-        * nr_to_write and writeback_index
-        */
-       no_nrwrite_index_update = wbc->no_nrwrite_index_update;
-       wbc->no_nrwrite_index_update = 1;
        pages_skipped = wbc->pages_skipped;
 
 retry:
@@ -2941,7 +3011,7 @@ retry:
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
-                              "%ld pages, ino %lu; err %d\n", __func__,
+                              "%ld pages, ino %lu; err %d", __func__,
                                wbc->nr_to_write, inode->i_ino, ret);
                        goto out_writepages;
                }
@@ -2963,8 +3033,7 @@ retry:
                mpd.io_done = 0;
                mpd.pages_written = 0;
                mpd.retval = 0;
-               ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
-                                       &mpd);
+               ret = write_cache_pages_da(mapping, wbc, &mpd);
                /*
                 * If we have a contiguous extent of pages and we
                 * haven't done the I/O yet, map the blocks and submit
@@ -3016,7 +3085,7 @@ retry:
        if (pages_skipped != wbc->pages_skipped)
                ext4_msg(inode->i_sb, KERN_CRIT,
                         "This should not happen leaving %s "
-                        "with nr_to_write = %ld ret = %d\n",
+                        "with nr_to_write = %ld ret = %d",
                         __func__, wbc->nr_to_write, ret);
 
        /* Update index */
@@ -3030,8 +3099,6 @@ retry:
                mapping->writeback_index = index;
 
 out_writepages:
-       if (!no_nrwrite_index_update)
-               wbc->no_nrwrite_index_update = 0;
        wbc->nr_to_write -= nr_to_writebump;
        wbc->range_start = range_start;
        trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
@@ -3076,7 +3143,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
                               loff_t pos, unsigned len, unsigned flags,
                               struct page **pagep, void **fsdata)
 {
-       int ret, retries = 0, quota_retries = 0;
+       int ret, retries = 0;
        struct page *page;
        pgoff_t index;
        unsigned from, to;
@@ -3135,22 +3202,6 @@ retry:
 
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
-
-       if ((ret == -EDQUOT) &&
-           EXT4_I(inode)->i_reserved_meta_blocks &&
-           (quota_retries++ < 3)) {
-               /*
-                * Since we often over-estimate the number of meta
-                * data blocks required, we may sometimes get a
-                * spurios out of quota error even though there would
-                * be enough space once we write the data blocks and
-                * find out how many meta data blocks were _really_
-                * required.  So try forcing the inode write to see if
-                * that helps.
-                */
-               write_inode_now(inode, (quota_retries == 3));
-               goto retry;
-       }
 out:
        return ret;
 }
@@ -3546,46 +3597,18 @@ out:
        return ret;
 }
 
+/*
+ * ext4_get_block used when preparing for a DIO write or buffer write.
+ * We allocate an uinitialized extent if blocks haven't been allocated.
+ * The extent will be converted to initialized after the IO is complete.
+ */
 static int ext4_get_block_write(struct inode *inode, sector_t iblock,
                   struct buffer_head *bh_result, int create)
 {
-       handle_t *handle = ext4_journal_current_handle();
-       int ret = 0;
-       unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
-       int dio_credits;
-       int started = 0;
-
        ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
                   inode->i_ino, create);
-       /*
-        * ext4_get_block in prepare for a DIO write or buffer write.
-        * We allocate an uinitialized extent if blocks haven't been allocated.
-        * The extent will be converted to initialized after IO complete.
-        */
-       create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
-
-       if (!handle) {
-               if (max_blocks > DIO_MAX_BLOCKS)
-                       max_blocks = DIO_MAX_BLOCKS;
-               dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
-               handle = ext4_journal_start(inode, dio_credits);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       goto out;
-               }
-               started = 1;
-       }
-
-       ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
-                             create);
-       if (ret > 0) {
-               bh_result->b_size = (ret << inode->i_blkbits);
-               ret = 0;
-       }
-       if (started)
-               ext4_journal_stop(handle);
-out:
-       return ret;
+       return _ext4_get_block(inode, iblock, bh_result,
+                              EXT4_GET_BLOCKS_IO_CREATE_EXT);
 }
 
 static void dump_completed_IO(struct inode * inode)
@@ -3973,7 +3996,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
 
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
 
        return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -4302,10 +4325,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
 
        if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
                                   count)) {
-               ext4_error(inode->i_sb, "inode #%lu: "
-                          "attempt to clear blocks %llu len %lu, invalid",
-                          inode->i_ino, (unsigned long long) block_to_free,
-                          count);
+               EXT4_ERROR_INODE(inode, "attempt to clear invalid "
+                                "blocks %llu len %lu",
+                                (unsigned long long) block_to_free, count);
                return 1;
        }
 
@@ -4410,11 +4432,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
                if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
                        ext4_handle_dirty_metadata(handle, inode, this_bh);
                else
-                       ext4_error(inode->i_sb,
-                                  "circular indirect block detected, "
-                                  "inode=%lu, block=%llu",
-                                  inode->i_ino,
-                                  (unsigned long long) this_bh->b_blocknr);
+                       EXT4_ERROR_INODE(inode,
+                                        "circular indirect block detected at "
+                                        "block %llu",
+                               (unsigned long long) this_bh->b_blocknr);
        }
 }
 
@@ -4452,11 +4473,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
 
                        if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                   nr, 1)) {
-                               ext4_error(inode->i_sb,
-                                          "indirect mapped block in inode "
-                                          "#%lu invalid (level %d, blk #%lu)",
-                                          inode->i_ino, depth,
-                                          (unsigned long) nr);
+                               EXT4_ERROR_INODE(inode,
+                                                "invalid indirect mapped "
+                                                "block %lu (level %d)",
+                                                (unsigned long) nr, depth);
                                break;
                        }
 
@@ -4468,9 +4488,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                         * (should be rare).
                         */
                        if (!bh) {
-                               ext4_error(inode->i_sb,
-                                          "Read failure, inode=%lu, block=%llu",
-                                          inode->i_ino, nr);
+                               EXT4_ERROR_INODE(inode,
+                                                "Read failure block=%llu",
+                                                (unsigned long long) nr);
                                continue;
                        }
 
@@ -4612,12 +4632,12 @@ void ext4_truncate(struct inode *inode)
        if (!ext4_can_truncate(inode))
                return;
 
-       EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+       ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
 
        if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
                ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
 
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ext4_ext_truncate(inode);
                return;
        }
@@ -4785,8 +4805,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
 
        bh = sb_getblk(sb, block);
        if (!bh) {
-               ext4_error(sb, "unable to read inode block - "
-                          "inode=%lu, block=%llu", inode->i_ino, block);
+               EXT4_ERROR_INODE(inode, "unable to read inode block - "
+                                "block %llu", block);
                return -EIO;
        }
        if (!buffer_uptodate(bh)) {
@@ -4884,8 +4904,8 @@ make_io:
                submit_bh(READ_META, bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
-                       ext4_error(sb, "unable to read inode block - inode=%lu,"
-                                  " block=%llu", inode->i_ino, block);
+                       EXT4_ERROR_INODE(inode, "unable to read inode "
+                                        "block %llu", block);
                        brelse(bh);
                        return -EIO;
                }
@@ -5096,8 +5116,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        ret = 0;
        if (ei->i_file_acl &&
            !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
-               ext4_error(sb, "bad extended attribute block %llu inode #%lu",
-                          ei->i_file_acl, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
+                                ei->i_file_acl);
                ret = -EIO;
                goto bad_inode;
        } else if (ei->i_flags & EXT4_EXTENTS_FL) {
@@ -5142,8 +5162,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                           new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
        } else {
                ret = -EIO;
-               ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
-                          inode->i_mode, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
                goto bad_inode;
        }
        brelse(iloc.bh);
@@ -5381,9 +5400,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
                if (wbc->sync_mode == WB_SYNC_ALL)
                        sync_dirty_buffer(iloc.bh);
                if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
-                       ext4_error(inode->i_sb, "IO error syncing inode, "
-                                  "inode=%lu, block=%llu", inode->i_ino,
-                                  (unsigned long long)iloc.bh->b_blocknr);
+                       EXT4_ERROR_INODE(inode,
+                               "IO error syncing inode (block=%llu)",
+                               (unsigned long long) iloc.bh->b_blocknr);
                        err = -EIO;
                }
                brelse(iloc.bh);
@@ -5455,7 +5474,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        if (attr->ia_valid & ATTR_SIZE) {
-               if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+               if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 
                        if (attr->ia_size > sbi->s_bitmap_maxbytes) {
@@ -5468,7 +5487,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
        if (S_ISREG(inode->i_mode) &&
            attr->ia_valid & ATTR_SIZE &&
            (attr->ia_size < inode->i_size ||
-            (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
+            (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
                handle_t *handle;
 
                handle = ext4_journal_start(inode, 3);
@@ -5500,7 +5519,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        }
                }
                /* ext4_truncate will clear the flag */
-               if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
+               if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
                        ext4_truncate(inode);
        }
 
@@ -5576,7 +5595,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
 
 static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
        return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
 }
@@ -5911,9 +5930,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
         */
 
        if (val)
-               EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL;
+               ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
        else
-               EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
        ext4_set_aops(inode);
 
        jbd2_journal_unlock_updates(journal);
index 016d024..bf5ae88 100644 (file)
@@ -258,7 +258,7 @@ setversion_out:
                if (me.moved_len > 0)
                        file_remove_suid(donor_filp);
 
-               if (copy_to_user((struct move_extent __user *)arg, 
+               if (copy_to_user((struct move_extent __user *)arg,
                                 &me, sizeof(me)))
                        err = -EFAULT;
 mext_out:
@@ -373,7 +373,30 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case EXT4_IOC32_SETRSVSZ:
                cmd = EXT4_IOC_SETRSVSZ;
                break;
-       case EXT4_IOC_GROUP_ADD:
+       case EXT4_IOC32_GROUP_ADD: {
+               struct compat_ext4_new_group_input __user *uinput;
+               struct ext4_new_group_input input;
+               mm_segment_t old_fs;
+               int err;
+
+               uinput = compat_ptr(arg);
+               err = get_user(input.group, &uinput->group);
+               err |= get_user(input.block_bitmap, &uinput->block_bitmap);
+               err |= get_user(input.inode_bitmap, &uinput->inode_bitmap);
+               err |= get_user(input.inode_table, &uinput->inode_table);
+               err |= get_user(input.blocks_count, &uinput->blocks_count);
+               err |= get_user(input.reserved_blocks,
+                               &uinput->reserved_blocks);
+               if (err)
+                       return -EFAULT;
+               old_fs = get_fs();
+               set_fs(KERNEL_DS);
+               err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
+                                (unsigned long) &input);
+               set_fs(old_fs);
+               return err;
+       }
+       case EXT4_IOC_MOVE_EXT:
                break;
        default:
                return -ENOIOCTLCMD;
index b423a36..12b3bc0 100644 (file)
@@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
        }
 }
 
+/*
+ * Cache the order of the largest free extent we have available in this block
+ * group.
+ */
+static void
+mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
+{
+       int i;
+       int bits;
+
+       grp->bb_largest_free_order = -1; /* uninit */
+
+       bits = sb->s_blocksize_bits + 1;
+       for (i = bits; i >= 0; i--) {
+               if (grp->bb_counters[i] > 0) {
+                       grp->bb_largest_free_order = i;
+                       break;
+               }
+       }
+}
+
 static noinline_for_stack
 void ext4_mb_generate_buddy(struct super_block *sb,
                                void *buddy, void *bitmap, ext4_group_t group)
@@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
                 */
                grp->bb_free = free;
        }
+       mb_set_largest_free_order(sb, grp);
 
        clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
 
@@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
  * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
  * So it can have information regarding groups_per_page which
  * is blocks_per_page/2
+ *
+ * Locking note:  This routine takes the block group lock of all groups
+ * for this page; do not hold this lock when calling this routine!
  */
 
 static int ext4_mb_init_cache(struct page *page, char *incore)
@@ -865,6 +890,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                        BUG_ON(incore == NULL);
                        mb_debug(1, "put buddy for group %u in page %lu/%x\n",
                                group, page->index, i * blocksize);
+                       trace_ext4_mb_buddy_bitmap_load(sb, group);
                        grinfo = ext4_get_group_info(sb, group);
                        grinfo->bb_fragments = 0;
                        memset(grinfo->bb_counters, 0,
@@ -882,6 +908,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                        BUG_ON(incore != NULL);
                        mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
                                group, page->index, i * blocksize);
+                       trace_ext4_mb_bitmap_load(sb, group);
 
                        /* see comments in ext4_mb_put_pa() */
                        ext4_lock_group(sb, group);
@@ -910,6 +937,11 @@ out:
        return err;
 }
 
+/*
+ * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
+ * block group lock of all groups for this page; do not hold the BG lock when
+ * calling this routine!
+ */
 static noinline_for_stack
 int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 {
@@ -1004,6 +1036,11 @@ err:
        return ret;
 }
 
+/*
+ * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
+ * block group lock of all groups for this page; do not hold the BG lock when
+ * calling this routine!
+ */
 static noinline_for_stack int
 ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                                        struct ext4_buddy *e4b)
@@ -1150,7 +1187,7 @@ err:
        return ret;
 }
 
-static void ext4_mb_release_desc(struct ext4_buddy *e4b)
+static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
 {
        if (e4b->bd_bitmap_page)
                page_cache_release(e4b->bd_bitmap_page);
@@ -1299,6 +1336,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                        buddy = buddy2;
                } while (1);
        }
+       mb_set_largest_free_order(sb, e4b->bd_info);
        mb_check_buddy(e4b);
 }
 
@@ -1427,6 +1465,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
                e4b->bd_info->bb_counters[ord]++;
                e4b->bd_info->bb_counters[ord]++;
        }
+       mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
 
        mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
        mb_check_buddy(e4b);
@@ -1617,7 +1656,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
        }
 
        ext4_unlock_group(ac->ac_sb, group);
-       ext4_mb_release_desc(e4b);
+       ext4_mb_unload_buddy(e4b);
 
        return 0;
 }
@@ -1672,7 +1711,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
                ext4_mb_use_best_found(ac, e4b);
        }
        ext4_unlock_group(ac->ac_sb, group);
-       ext4_mb_release_desc(e4b);
+       ext4_mb_unload_buddy(e4b);
 
        return 0;
 }
@@ -1821,16 +1860,22 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
        }
 }
 
+/* This is now called BEFORE we load the buddy bitmap. */
 static int ext4_mb_good_group(struct ext4_allocation_context *ac,
                                ext4_group_t group, int cr)
 {
        unsigned free, fragments;
-       unsigned i, bits;
        int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
        struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
 
        BUG_ON(cr < 0 || cr >= 4);
-       BUG_ON(EXT4_MB_GRP_NEED_INIT(grp));
+
+       /* We only do this if the grp has never been initialized */
+       if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
+               int ret = ext4_mb_init_group(ac->ac_sb, group);
+               if (ret)
+                       return 0;
+       }
 
        free = grp->bb_free;
        fragments = grp->bb_fragments;
@@ -1843,17 +1888,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        case 0:
                BUG_ON(ac->ac_2order == 0);
 
+               if (grp->bb_largest_free_order < ac->ac_2order)
+                       return 0;
+
                /* Avoid using the first bg of a flexgroup for data files */
                if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
                    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
                    ((group % flex_size) == 0))
                        return 0;
 
-               bits = ac->ac_sb->s_blocksize_bits + 1;
-               for (i = ac->ac_2order; i <= bits; i++)
-                       if (grp->bb_counters[i] > 0)
-                               return 1;
-               break;
+               return 1;
        case 1:
                if ((free / fragments) >= ac->ac_g_ex.fe_len)
                        return 1;
@@ -1964,7 +2008,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
        sbi = EXT4_SB(sb);
        ngroups = ext4_get_groups_count(sb);
        /* non-extent files are limited to low blocks/groups */
-       if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
                ngroups = sbi->s_blockfile_groups;
 
        BUG_ON(ac->ac_status == AC_STATUS_FOUND);
@@ -2024,15 +2068,11 @@ repeat:
                group = ac->ac_g_ex.fe_group;
 
                for (i = 0; i < ngroups; group++, i++) {
-                       struct ext4_group_info *grp;
-                       struct ext4_group_desc *desc;
-
                        if (group == ngroups)
                                group = 0;
 
-                       /* quick check to skip empty groups */
-                       grp = ext4_get_group_info(sb, group);
-                       if (grp->bb_free == 0)
+                       /* This now checks without needing the buddy page */
+                       if (!ext4_mb_good_group(ac, group, cr))
                                continue;
 
                        err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -2040,15 +2080,18 @@ repeat:
                                goto out;
 
                        ext4_lock_group(sb, group);
+
+                       /*
+                        * We need to check again after locking the
+                        * block group
+                        */
                        if (!ext4_mb_good_group(ac, group, cr)) {
-                               /* someone did allocation from this group */
                                ext4_unlock_group(sb, group);
-                               ext4_mb_release_desc(&e4b);
+                               ext4_mb_unload_buddy(&e4b);
                                continue;
                        }
 
                        ac->ac_groups_scanned++;
-                       desc = ext4_get_group_desc(sb, group, NULL);
                        if (cr == 0)
                                ext4_mb_simple_scan_group(ac, &e4b);
                        else if (cr == 1 &&
@@ -2058,7 +2101,7 @@ repeat:
                                ext4_mb_complex_scan_group(ac, &e4b);
 
                        ext4_unlock_group(sb, group);
-                       ext4_mb_release_desc(&e4b);
+                       ext4_mb_unload_buddy(&e4b);
 
                        if (ac->ac_status != AC_STATUS_CONTINUE)
                                break;
@@ -2148,7 +2191,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
        ext4_lock_group(sb, group);
        memcpy(&sg, ext4_get_group_info(sb, group), i);
        ext4_unlock_group(sb, group);
-       ext4_mb_release_desc(&e4b);
+       ext4_mb_unload_buddy(&e4b);
 
        seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
                        sg.info.bb_fragments, sg.info.bb_first_free);
@@ -2255,6 +2298,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
        INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
        init_rwsem(&meta_group_info[i]->alloc_sem);
        meta_group_info[i]->bb_free_root = RB_ROOT;
+       meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
 
 #ifdef DOUBLE_CHECK
        {
@@ -2536,6 +2580,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                         entry->count, entry->group, entry);
 
                if (test_opt(sb, DISCARD)) {
+                       int ret;
                        ext4_fsblk_t discard_block;
 
                        discard_block = entry->start_blk +
@@ -2543,7 +2588,12 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                        trace_ext4_discard_blocks(sb,
                                        (unsigned long long)discard_block,
                                        entry->count);
-                       sb_issue_discard(sb, discard_block, entry->count);
+                       ret = sb_issue_discard(sb, discard_block, entry->count);
+                       if (ret == EOPNOTSUPP) {
+                               ext4_warning(sb,
+                                       "discard not supported, disabling");
+                               clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+                       }
                }
 
                err = ext4_mb_load_buddy(sb, entry->group, &e4b);
@@ -2568,7 +2618,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                }
                ext4_unlock_group(sb, entry->group);
                kmem_cache_free(ext4_free_ext_cachep, entry);
-               ext4_mb_release_desc(&e4b);
+               ext4_mb_unload_buddy(&e4b);
        }
 
        mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
@@ -2641,7 +2691,7 @@ int __init init_ext4_mballoc(void)
 
 void exit_ext4_mballoc(void)
 {
-       /* 
+       /*
         * Wait for completion of call_rcu()'s on ext4_pspace_cachep
         * before destroying the slab cache.
         */
@@ -2981,7 +3031,7 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
        if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
                atomic_inc(&sbi->s_bal_reqs);
                atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
-               if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len)
+               if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
                        atomic_inc(&sbi->s_bal_success);
                atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
                if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
@@ -3123,7 +3173,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                        continue;
 
                /* non-extent files can't have physical blocks past 2^32 */
-               if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) &&
+               if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
                        pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
                        continue;
 
@@ -3280,7 +3330,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
        spin_unlock(&pa->pa_lock);
 
        grp_blk = pa->pa_pstart;
-       /* 
+       /*
         * If doing group-based preallocation, pa_pstart may be in the
         * next group when pa is used up
         */
@@ -3697,7 +3747,7 @@ out:
        ext4_unlock_group(sb, group);
        if (ac)
                kmem_cache_free(ext4_ac_cachep, ac);
-       ext4_mb_release_desc(&e4b);
+       ext4_mb_unload_buddy(&e4b);
        put_bh(bitmap_bh);
        return free;
 }
@@ -3801,7 +3851,7 @@ repeat:
                if (bitmap_bh == NULL) {
                        ext4_error(sb, "Error reading block bitmap for %u",
                                        group);
-                       ext4_mb_release_desc(&e4b);
+                       ext4_mb_unload_buddy(&e4b);
                        continue;
                }
 
@@ -3810,7 +3860,7 @@ repeat:
                ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
                ext4_unlock_group(sb, group);
 
-               ext4_mb_release_desc(&e4b);
+               ext4_mb_unload_buddy(&e4b);
                put_bh(bitmap_bh);
 
                list_del(&pa->u.pa_tmp_list);
@@ -4074,7 +4124,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
                ext4_mb_release_group_pa(&e4b, pa, ac);
                ext4_unlock_group(sb, group);
 
-               ext4_mb_release_desc(&e4b);
+               ext4_mb_unload_buddy(&e4b);
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
        }
@@ -4484,12 +4534,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
                        if (!bh)
                                tbh = sb_find_get_block(inode->i_sb,
                                                        block + i);
-                       ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 
+                       ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
                                    inode, tbh, block + i);
                }
        }
 
-       /* 
+       /*
         * We need to make sure we don't reuse the freed block until
         * after the transaction is committed, which we can do by
         * treating the block as metadata, below.  We make an
@@ -4610,7 +4660,7 @@ do_more:
                atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
        }
 
-       ext4_mb_release_desc(&e4b);
+       ext4_mb_unload_buddy(&e4b);
 
        freed += count;
 
index 34dcfc5..6f3a27e 100644 (file)
@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode)
         */
        if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
                                       EXT4_FEATURE_INCOMPAT_EXTENTS) ||
-           (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+           (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EINVAL;
 
        if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
index d1fc662..3a6c92a 100644 (file)
@@ -482,6 +482,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
        int depth = ext_depth(orig_inode);
        int ret;
 
+       start_ext.ee_block = end_ext.ee_block = 0;
        o_start = o_end = oext = orig_path[depth].p_ext;
        oext_alen = ext4_ext_get_actual_len(oext);
        start_ext.ee_len = end_ext.ee_len = 0;
@@ -529,7 +530,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
         * new_ext       |-------|
         */
        if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
-               ext4_error(orig_inode->i_sb,
+               EXT4_ERROR_INODE(orig_inode,
                        "new_ext_end(%u) should be less than or equal to "
                        "oext->ee_block(%u) + oext_alen(%d) - 1",
                        new_ext_end, le32_to_cpu(oext->ee_block),
@@ -692,12 +693,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
        while (1) {
                /* The extent for donor must be found. */
                if (!dext) {
-                       ext4_error(donor_inode->i_sb,
+                       EXT4_ERROR_INODE(donor_inode,
                                   "The extent for donor must be found");
                        *err = -EIO;
                        goto out;
                } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
-                       ext4_error(donor_inode->i_sb,
+                       EXT4_ERROR_INODE(donor_inode,
                                "Donor offset(%u) and the first block of donor "
                                "extent(%u) should be equal",
                                donor_off,
@@ -976,11 +977,11 @@ mext_check_arguments(struct inode *orig_inode,
        }
 
        /* Ext4 move extent supports only extent based file */
-       if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
                ext4_debug("ext4 move extent: orig file is not extents "
                        "based file [ino:orig %lu]\n", orig_inode->i_ino);
                return -EOPNOTSUPP;
-       } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
+       } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
                ext4_debug("ext4 move extent: donor file is not extents "
                        "based file [ino:donor %lu]\n", donor_inode->i_ino);
                return -EOPNOTSUPP;
@@ -1354,7 +1355,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                        if (ret1 < 0)
                                break;
                        if (*moved_len > len) {
-                               ext4_error(orig_inode->i_sb,
+                               EXT4_ERROR_INODE(orig_inode,
                                        "We replaced blocks too much! "
                                        "sum of replaced: %llu requested: %llu",
                                        *moved_len, len);
index 0c070fa..a43e661 100644 (file)
@@ -187,7 +187,7 @@ unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
                return blocksize;
        return (len & 65532) | ((len & 3) << 16);
 }
-  
+
 __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
 {
        if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
@@ -197,7 +197,7 @@ __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
        if (len == blocksize) {
                if (blocksize == 65536)
                        return cpu_to_le16(EXT4_MAX_REC_LEN);
-               else 
+               else
                        return cpu_to_le16(0);
        }
        return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
@@ -349,7 +349,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
                brelse(bh);
        }
        if (bcount)
-               printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", 
+               printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
                       levels ? "" : "   ", names, space/bcount,
                       (space/bcount)*100/blocksize);
        return (struct stats) { names, space, bcount};
@@ -653,10 +653,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
        int ret, err;
        __u32 hashval;
 
-       dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 
+       dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
                       start_hash, start_minor_hash));
        dir = dir_file->f_path.dentry->d_inode;
-       if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
+       if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
                hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
                if (hinfo.hash_version <= DX_HASH_TEA)
                        hinfo.hash_version +=
@@ -801,7 +801,7 @@ static void ext4_update_dx_flag(struct inode *inode)
 {
        if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
                                     EXT4_FEATURE_COMPAT_DIR_INDEX))
-               EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
 }
 
 /*
@@ -943,8 +943,8 @@ restart:
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
                        /* read error, skip block & hope for the best */
-                       ext4_error(sb, "reading directory #%lu offset %lu",
-                                  dir->i_ino, (unsigned long)block);
+                       EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
+                                        (unsigned long) block);
                        brelse(bh);
                        goto next;
                }
@@ -1066,15 +1066,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
                __u32 ino = le32_to_cpu(de->inode);
                brelse(bh);
                if (!ext4_valid_inum(dir->i_sb, ino)) {
-                       ext4_error(dir->i_sb, "bad inode number: %u", ino);
+                       EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
                        return ERR_PTR(-EIO);
                }
                inode = ext4_iget(dir->i_sb, ino);
                if (unlikely(IS_ERR(inode))) {
                        if (PTR_ERR(inode) == -ESTALE) {
-                               ext4_error(dir->i_sb,
-                                               "deleted inode referenced: %u",
-                                               ino);
+                               EXT4_ERROR_INODE(dir,
+                                                "deleted inode referenced: %u",
+                                                ino);
                                return ERR_PTR(-EIO);
                        } else {
                                return ERR_CAST(inode);
@@ -1104,8 +1104,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
        brelse(bh);
 
        if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
-               ext4_error(child->d_inode->i_sb,
-                          "bad inode number: %u", ino);
+               EXT4_ERROR_INODE(child->d_inode,
+                                "bad parent inode number: %u", ino);
                return ERR_PTR(-EIO);
        }
 
@@ -1141,7 +1141,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
        unsigned rec_len = 0;
 
        while (count--) {
-               struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
+               struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
                                                (from + (map->offs<<2));
                rec_len = EXT4_DIR_REC_LEN(de->name_len);
                memcpy (to, de, rec_len);
@@ -1404,9 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
        de = (struct ext4_dir_entry_2 *)((char *)fde +
                ext4_rec_len_from_disk(fde->rec_len, blocksize));
        if ((char *) de >= (((char *) root) + blocksize)) {
-               ext4_error(dir->i_sb,
-                          "invalid rec_len for '..' in inode %lu",
-                          dir->i_ino);
+               EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
                brelse(bh);
                return -EIO;
        }
@@ -1418,7 +1416,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
                brelse(bh);
                return retval;
        }
-       EXT4_I(dir)->i_flags |= EXT4_INDEX_FL;
+       ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
        data1 = bh2->b_data;
 
        memcpy (data1, de, len);
@@ -1491,7 +1489,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                retval = ext4_dx_add_entry(handle, dentry, inode);
                if (!retval || (retval != ERR_BAD_DX_DIR))
                        return retval;
-               EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL;
+               ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
                dx_fallback++;
                ext4_mark_inode_dirty(handle, dir);
        }
@@ -1519,6 +1517,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
        de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
        retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
        brelse(bh);
+       if (retval == 0)
+               ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
        return retval;
 }
 
@@ -1915,9 +1915,8 @@ static int empty_dir(struct inode *inode)
        if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
            !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
                if (err)
-                       ext4_error(inode->i_sb,
-                                  "error %d reading directory #%lu offset 0",
-                                  err, inode->i_ino);
+                       EXT4_ERROR_INODE(inode,
+                               "error %d reading directory lblock 0", err);
                else
                        ext4_warning(inode->i_sb,
                                     "bad directory (dir #%lu) - no data block",
@@ -1941,17 +1940,17 @@ static int empty_dir(struct inode *inode)
        de = ext4_next_entry(de1, sb->s_blocksize);
        while (offset < inode->i_size) {
                if (!bh ||
-                       (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+                   (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+                       unsigned int lblock;
                        err = 0;
                        brelse(bh);
-                       bh = ext4_bread(NULL, inode,
-                               offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
+                       lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
+                       bh = ext4_bread(NULL, inode, lblock, 0, &err);
                        if (!bh) {
                                if (err)
-                                       ext4_error(sb,
-                                                  "error %d reading directory"
-                                                  " #%lu offset %u",
-                                                  err, inode->i_ino, offset);
+                                       EXT4_ERROR_INODE(inode,
+                                               "error %d reading directory "
+                                               "lblock %u", err, lblock);
                                offset += sb->s_blocksize;
                                continue;
                        }
@@ -2297,7 +2296,7 @@ retry:
                }
        } else {
                /* clear the extent format for fast symlink */
-               EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
                inode->i_op = &ext4_fast_symlink_inode_operations;
                memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
                inode->i_size = l-1;
index 5692c48..6df797e 100644 (file)
@@ -911,7 +911,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        percpu_counter_add(&sbi->s_freeinodes_counter,
                           EXT4_INODES_PER_GROUP(sb));
 
-       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+           sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group;
                flex_group = ext4_flex_group(sbi, input->group);
                atomic_add(input->free_blocks_count,
index e14d22c..49d88c0 100644 (file)
@@ -241,6 +241,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
        if (sb->s_flags & MS_RDONLY)
                return ERR_PTR(-EROFS);
 
+       vfs_check_frozen(sb, SB_FREEZE_WRITE);
        /* Special case here: if the journal has aborted behind our
         * backs (eg. EIO in the commit thread), then we still need to
         * take the FS itself readonly cleanly. */
@@ -941,6 +942,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
        seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
        if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
                seq_puts(seq, ",journal_async_commit");
+       else if (test_opt(sb, JOURNAL_CHECKSUM))
+               seq_puts(seq, ",journal_checksum");
        if (test_opt(sb, NOBH))
                seq_puts(seq, ",nobh");
        if (test_opt(sb, I_VERSION))
@@ -2213,7 +2216,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
 struct ext4_attr {
        struct attribute attr;
        ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
-       ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 
+       ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
                         const char *, size_t);
        int offset;
 };
@@ -2430,6 +2433,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                __releases(kernel_lock)
                                __acquires(kernel_lock)
 {
+       char *orig_data = kstrdup(data, GFP_KERNEL);
        struct buffer_head *bh;
        struct ext4_super_block *es = NULL;
        struct ext4_sb_info *sbi;
@@ -2793,24 +2797,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
 
-       err = percpu_counter_init(&sbi->s_freeblocks_counter,
-                       ext4_count_free_blocks(sb));
-       if (!err) {
-               err = percpu_counter_init(&sbi->s_freeinodes_counter,
-                               ext4_count_free_inodes(sb));
-       }
-       if (!err) {
-               err = percpu_counter_init(&sbi->s_dirs_counter,
-                               ext4_count_dirs(sb));
-       }
-       if (!err) {
-               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
-       }
-       if (err) {
-               ext4_msg(sb, KERN_ERR, "insufficient memory");
-               goto failed_mount3;
-       }
-
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_max_writeback_mb_bump = 128;
 
@@ -2910,6 +2896,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 
 no_journal:
+       err = percpu_counter_init(&sbi->s_freeblocks_counter,
+                                 ext4_count_free_blocks(sb));
+       if (!err)
+               err = percpu_counter_init(&sbi->s_freeinodes_counter,
+                                         ext4_count_free_inodes(sb));
+       if (!err)
+               err = percpu_counter_init(&sbi->s_dirs_counter,
+                                         ext4_count_dirs(sb));
+       if (!err)
+               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+       if (err) {
+               ext4_msg(sb, KERN_ERR, "insufficient memory");
+               goto failed_mount_wq;
+       }
        if (test_opt(sb, NOBH)) {
                if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
                        ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
@@ -3001,7 +3001,7 @@ no_journal:
        err = ext4_setup_system_zone(sb);
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to initialize system "
-                        "zone (%d)\n", err);
+                        "zone (%d)", err);
                goto failed_mount4;
        }
 
@@ -3040,9 +3040,11 @@ no_journal:
        } else
                descr = "out journal";
 
-       ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
+       ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
+               "Opts: %s", descr, orig_data);
 
        lock_kernel();
+       kfree(orig_data);
        return 0;
 
 cantfind_ext4:
@@ -3059,6 +3061,10 @@ failed_mount_wq:
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        }
+       percpu_counter_destroy(&sbi->s_freeblocks_counter);
+       percpu_counter_destroy(&sbi->s_freeinodes_counter);
+       percpu_counter_destroy(&sbi->s_dirs_counter);
+       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount3:
        if (sbi->s_flex_groups) {
                if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3066,10 +3072,6 @@ failed_mount3:
                else
                        kfree(sbi->s_flex_groups);
        }
-       percpu_counter_destroy(&sbi->s_freeblocks_counter);
-       percpu_counter_destroy(&sbi->s_freeinodes_counter);
-       percpu_counter_destroy(&sbi->s_dirs_counter);
-       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
@@ -3089,6 +3091,7 @@ out_fail:
        kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
        lock_kernel();
+       kfree(orig_data);
        return ret;
 }
 
@@ -3380,7 +3383,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        if (!(sb->s_flags & MS_RDONLY))
                es->s_wtime = cpu_to_le32(get_seconds());
        es->s_kbytes_written =
-               cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 
+               cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
                            ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
        ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
@@ -3485,8 +3488,10 @@ int ext4_force_commit(struct super_block *sb)
                return 0;
 
        journal = EXT4_SB(sb)->s_journal;
-       if (journal)
+       if (journal) {
+               vfs_check_frozen(sb, SB_FREEZE_WRITE);
                ret = ext4_journal_force_commit(journal);
+       }
 
        return ret;
 }
@@ -3535,18 +3540,16 @@ static int ext4_freeze(struct super_block *sb)
         * the journal.
         */
        error = jbd2_journal_flush(journal);
-       if (error < 0) {
-       out:
-               jbd2_journal_unlock_updates(journal);
-               return error;
-       }
+       if (error < 0)
+               goto out;
 
        /* Journal blocked and flushed, clear needs_recovery flag. */
        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
        error = ext4_commit_super(sb, 1);
-       if (error)
-               goto out;
-       return 0;
+out:
+       /* we rely on s_frozen to stop further updates */
+       jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+       return error;
 }
 
 /*
@@ -3563,7 +3566,6 @@ static int ext4_unfreeze(struct super_block *sb)
        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
        ext4_commit_super(sb, 1);
        unlock_super(sb);
-       jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
        return 0;
 }
 
@@ -3580,6 +3582,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 #ifdef CONFIG_QUOTA
        int i;
 #endif
+       char *orig_data = kstrdup(data, GFP_KERNEL);
 
        lock_kernel();
 
@@ -3713,6 +3716,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 #endif
        unlock_super(sb);
        unlock_kernel();
+
+       ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
+       kfree(orig_data);
        return 0;
 
 restore_opts:
@@ -3734,6 +3740,7 @@ restore_opts:
 #endif
        unlock_super(sb);
        unlock_kernel();
+       kfree(orig_data);
        return err;
 }
 
@@ -4141,6 +4148,7 @@ static int __init init_ext4_fs(void)
 {
        int err;
 
+       ext4_check_flag_values();
        err = init_ext4_system_zone();
        if (err)
                return err;
index 00740cb..ed9354a 100644 (file)
@@ -34,6 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
        .readlink       = generic_readlink,
        .follow_link    = page_follow_link_light,
        .put_link       = page_put_link,
+       .setattr        = ext4_setattr,
 #ifdef CONFIG_EXT4_FS_XATTR
        .setxattr       = generic_setxattr,
        .getxattr       = generic_getxattr,
@@ -45,6 +46,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
 const struct inode_operations ext4_fast_symlink_inode_operations = {
        .readlink       = generic_readlink,
        .follow_link    = ext4_follow_link,
+       .setattr        = ext4_setattr,
 #ifdef CONFIG_EXT4_FS_XATTR
        .setxattr       = generic_setxattr,
        .getxattr       = generic_getxattr,
index 2de0e95..0433800 100644 (file)
@@ -228,9 +228,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
        if (ext4_xattr_check_block(bh)) {
 bad_block:
-               ext4_error(inode->i_sb,
-                          "inode %lu: bad block %llu", inode->i_ino,
-                          EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                error = -EIO;
                goto cleanup;
        }
@@ -372,9 +371,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
        if (ext4_xattr_check_block(bh)) {
-               ext4_error(inode->i_sb,
-                          "inode %lu: bad block %llu", inode->i_ino,
-                          EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                error = -EIO;
                goto cleanup;
        }
@@ -666,8 +664,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
                        atomic_read(&(bs->bh->b_count)),
                        le32_to_cpu(BHDR(bs->bh)->h_refcount));
                if (ext4_xattr_check_block(bs->bh)) {
-                       ext4_error(sb, "inode %lu: bad block %llu",
-                                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+                       EXT4_ERROR_INODE(inode, "bad block %llu",
+                                        EXT4_I(inode)->i_file_acl);
                        error = -EIO;
                        goto cleanup;
                }
@@ -820,7 +818,7 @@ inserted:
                                                EXT4_I(inode)->i_block_group);
 
                        /* non-extent files can't have physical blocks past 2^32 */
-                       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
 
                        block = ext4_new_meta_blocks(handle, inode,
@@ -828,7 +826,7 @@ inserted:
                        if (error)
                                goto cleanup;
 
-                       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                                BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
 
                        ea_idebug(inode, "creating block %d", block);
@@ -880,8 +878,8 @@ cleanup_dquot:
        goto cleanup;
 
 bad_block:
-       ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+       EXT4_ERROR_INODE(inode, "bad block %llu",
+                        EXT4_I(inode)->i_file_acl);
        goto cleanup;
 
 #undef header
@@ -1194,8 +1192,8 @@ retry:
                if (!bh)
                        goto cleanup;
                if (ext4_xattr_check_block(bh)) {
-                       ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                                  inode->i_ino, EXT4_I(inode)->i_file_acl);
+                       EXT4_ERROR_INODE(inode, "bad block %llu",
+                                        EXT4_I(inode)->i_file_acl);
                        error = -EIO;
                        goto cleanup;
                }
@@ -1372,14 +1370,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
                goto cleanup;
        bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
        if (!bh) {
-               ext4_error(inode->i_sb, "inode %lu: block %llu read error",
-                          inode->i_ino, EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "block %llu read error",
+                                EXT4_I(inode)->i_file_acl);
                goto cleanup;
        }
        if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
            BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-               ext4_error(inode->i_sb, "inode %lu: bad block %llu",
-                          inode->i_ino, EXT4_I(inode)->i_file_acl);
+               EXT4_ERROR_INODE(inode, "bad block %llu",
+                                EXT4_I(inode)->i_file_acl);
                goto cleanup;
        }
        ext4_xattr_release_block(handle, inode, bh);
@@ -1504,9 +1502,8 @@ again:
                }
                bh = sb_bread(inode->i_sb, ce->e_block);
                if (!bh) {
-                       ext4_error(inode->i_sb,
-                               "inode %lu: block %lu read error",
-                               inode->i_ino, (unsigned long) ce->e_block);
+                       EXT4_ERROR_INODE(inode, "block %lu read error",
+                                        (unsigned long) ce->e_block);
                } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
                                EXT4_XATTR_REFCOUNT_MAX) {
                        ea_idebug(inode, "block %lu refcount %d>=%d",
index aee049c..0ec7bb2 100644 (file)
@@ -57,6 +57,8 @@ const struct inode_operations vxfs_dir_inode_ops = {
 };
 
 const struct file_operations vxfs_dir_operations = {
+       .llseek =               generic_file_llseek,
+       .read =                 generic_read_dir,
        .readdir =              vxfs_readdir,
 };
 
index b9ab69b..e0aca9a 100644 (file)
@@ -272,6 +272,7 @@ static int isofs_readdir(struct file *filp,
 
 const struct file_operations isofs_dir_operations =
 {
+       .llseek = generic_file_llseek,
        .read = generic_read_dir,
        .readdir = isofs_readdir,
 };
index bfc70f5..e214d68 100644 (file)
@@ -1311,7 +1311,6 @@ int jbd2_journal_stop(handle_t *handle)
        if (handle->h_sync)
                transaction->t_synchronous_commit = 1;
        current->journal_info = NULL;
-       spin_lock(&journal->j_state_lock);
        spin_lock(&transaction->t_handle_lock);
        transaction->t_outstanding_credits -= handle->h_buffer_credits;
        transaction->t_updates--;
@@ -1340,8 +1339,7 @@ int jbd2_journal_stop(handle_t *handle)
                jbd_debug(2, "transaction too old, requesting commit for "
                                        "handle %p\n", handle);
                /* This is non-blocking */
-               __jbd2_log_start_commit(journal, transaction->t_tid);
-               spin_unlock(&journal->j_state_lock);
+               jbd2_log_start_commit(journal, transaction->t_tid);
 
                /*
                 * Special case: JBD2_SYNC synchronous updates require us
@@ -1351,7 +1349,6 @@ int jbd2_journal_stop(handle_t *handle)
                        err = jbd2_log_wait_commit(journal, tid);
        } else {
                spin_unlock(&transaction->t_handle_lock);
-               spin_unlock(&journal->j_state_lock);
        }
 
        lock_map_release(&handle->h_lockdep_map);
index 92dde6f..9578cbe 100644 (file)
@@ -49,6 +49,7 @@ extern int ncp_symlink(struct inode *, struct dentry *, const char *);
                      
 const struct file_operations ncp_dir_operations =
 {
+       .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .readdir        = ncp_readdir,
        .unlocked_ioctl = ncp_ioctl,
index ee9a179..db64854 100644 (file)
@@ -1741,6 +1741,7 @@ remove_lru_entry:
                        clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
                        smp_mb__after_clear_bit();
                }
+               spin_unlock(&inode->i_lock);
        }
        spin_unlock(&nfs_access_lru_lock);
        nfs_access_free_list(&head);
index 3aea3ca..91679e2 100644 (file)
@@ -1386,7 +1386,7 @@ static int nfs_commit_inode(struct inode *inode, int how)
        int res = 0;
 
        if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
-               goto out;
+               goto out_mark_dirty;
        spin_lock(&inode->i_lock);
        res = nfs_scan_commit(inode, &head, 0, 0);
        spin_unlock(&inode->i_lock);
@@ -1398,9 +1398,18 @@ static int nfs_commit_inode(struct inode *inode, int how)
                        wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
                                        nfs_wait_bit_killable,
                                        TASK_KILLABLE);
+               else
+                       goto out_mark_dirty;
        } else
                nfs_commit_clear_lock(NFS_I(inode));
-out:
+       return res;
+       /* Note: If we exit without ensuring that the commit is complete,
+        * we must mark the inode as dirty. Otherwise, future calls to
+        * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
+        * that the data is on the disk.
+        */
+out_mark_dirty:
+       __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
        return res;
 }
 
@@ -1509,14 +1518,17 @@ int nfs_wb_page(struct inode *inode, struct page *page)
        };
        int ret;
 
-       while(PagePrivate(page)) {
+       for (;;) {
                wait_on_page_writeback(page);
                if (clear_page_dirty_for_io(page)) {
                        ret = nfs_writepage_locked(page, &wbc);
                        if (ret < 0)
                                goto out_error;
+                       continue;
                }
-               ret = sync_inode(inode, &wbc);
+               if (!PagePrivate(page))
+                       break;
+               ret = nfs_commit_inode(inode, FLUSH_SYNC);
                if (ret < 0)
                        goto out_error;
        }
index 885ab55..9b58d38 100644 (file)
@@ -267,7 +267,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
                shpending = p->signal->shared_pending.signal;
                blocked = p->blocked;
                collect_sigign_sigcatch(p, &ignored, &caught);
-               num_threads = atomic_read(&p->signal->count);
+               num_threads = get_nr_threads(p);
                rcu_read_lock();  /* FIXME: is this correct? */
                qsize = atomic_read(&__task_cred(p)->user->sigpending);
                rcu_read_unlock();
@@ -410,7 +410,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                        tty_nr = new_encode_dev(tty_devnum(sig->tty));
                }
 
-               num_threads = atomic_read(&sig->count);
+               num_threads = get_nr_threads(task);
                collect_sigign_sigcatch(task, &sigign, &sigcatch);
 
                cmin_flt = sig->cmin_flt;
index c7f9f23..acb7ef8 100644 (file)
@@ -166,18 +166,6 @@ static int get_fs_path(struct task_struct *task, struct path *path, bool root)
        return result;
 }
 
-static int get_nr_threads(struct task_struct *tsk)
-{
-       unsigned long flags;
-       int count = 0;
-
-       if (lock_task_sighand(tsk, &flags)) {
-               count = atomic_read(&tsk->signal->count);
-               unlock_task_sighand(tsk, &flags);
-       }
-       return count;
-}
-
 static int proc_cwd_link(struct inode *inode, struct path *path)
 {
        struct task_struct *task = get_proc_task(inode);
@@ -2444,7 +2432,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
        const struct pid_entry *p = ptr;
        struct inode *inode;
        struct proc_inode *ei;
-       struct dentry *error = ERR_PTR(-EINVAL);
+       struct dentry *error;
 
        /* Allocate the inode */
        error = ERR_PTR(-ENOMEM);
@@ -2794,7 +2782,7 @@ out:
 
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
-       struct dentry *result = ERR_PTR(-ENOENT);
+       struct dentry *result;
        struct task_struct *task;
        unsigned tgid;
        struct pid_namespace *ns;
index 43c1274..2791907 100644 (file)
@@ -343,21 +343,6 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 /*
  * Return an inode number between PROC_DYNAMIC_FIRST and
  * 0xffffffff, or zero on failure.
- *
- * Current inode allocations in the proc-fs (hex-numbers):
- *
- * 00000000            reserved
- * 00000001-00000fff   static entries  (goners)
- *      001            root-ino
- *
- * 00001000-00001fff   unused
- * 0001xxxx-7fffxxxx   pid-dir entries for pid 1-7fff
- * 80000000-efffffff   unused
- * f0000000-ffffffff   dynamic entries
- *
- * Goal:
- *     Once we split the thing into several virtual filesystems,
- *     we will get rid of magical ranges (and this comment, BTW).
  */
 static unsigned int get_inode_number(void)
 {
index c837a77..6f37c39 100644 (file)
@@ -588,7 +588,7 @@ static struct kcore_list kcore_text;
  */
 static void __init proc_kcore_text_init(void)
 {
-       kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT);
+       kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
 }
 #else
 static void __init proc_kcore_text_init(void)
index 757c069..4258384 100644 (file)
@@ -110,7 +110,6 @@ void __init proc_root_init(void)
        if (err)
                return;
        proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
-       err = PTR_ERR(proc_mnt);
        if (IS_ERR(proc_mnt)) {
                unregister_filesystem(&proc_fs_type);
                return;
index 6f30c3d..3d3fd46 100644 (file)
@@ -77,6 +77,7 @@ out:
 
 const struct file_operations qnx4_dir_operations =
 {
+       .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .readdir        = qnx4_readdir,
        .fsync          = simple_fsync,
index 655a4c5..1ad8bf0 100644 (file)
@@ -1514,11 +1514,13 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
 /*
  * This operation can block, but only after everything is updated
  */
-int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve)
+int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 {
        int cnt, ret = 0;
        char warntype[MAXQUOTAS];
+       int warn = flags & DQUOT_SPACE_WARN;
+       int reserve = flags & DQUOT_SPACE_RESERVE;
+       int nofail = flags & DQUOT_SPACE_NOFAIL;
 
        /*
         * First test before acquiring mutex - solves deadlocks when we
@@ -1539,7 +1541,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
                        continue;
                ret = check_bdq(inode->i_dquot[cnt], number, !warn,
                                warntype+cnt);
-               if (ret) {
+               if (ret && !nofail) {
                        spin_unlock(&dq_data_lock);
                        goto out_flush_warn;
                }
@@ -1638,10 +1640,11 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
 /*
  * This operation can block, but only after everything is updated
  */
-void __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
+void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 {
        unsigned int cnt;
        char warntype[MAXQUOTAS];
+       int reserve = flags & DQUOT_SPACE_RESERVE;
 
        /* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
index 113386d..9c04852 100644 (file)
@@ -97,6 +97,23 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
 }
 EXPORT_SYMBOL(generic_file_llseek);
 
+/**
+ * noop_llseek - No Operation Performed llseek implementation
+ * @file:      file structure to seek on
+ * @offset:    file offset to seek to
+ * @origin:    type of seek
+ *
+ * This is an implementation of ->llseek useable for the rare special case when
+ * userspace expects the seek to succeed but the (device) file is actually not
+ * able to perform the seek. In this case you use noop_llseek() instead of
+ * falling back to the default implementation of ->llseek.
+ */
+loff_t noop_llseek(struct file *file, loff_t offset, int origin)
+{
+       return file->f_pos;
+}
+EXPORT_SYMBOL(noop_llseek);
+
 loff_t no_llseek(struct file *file, loff_t offset, int origin)
 {
        return -ESPIPE;
index 0793044..4455fbe 100644 (file)
@@ -18,6 +18,7 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
                              int datasync);
 
 const struct file_operations reiserfs_dir_operations = {
+       .llseek = generic_file_llseek,
        .read = generic_read_dir,
        .readdir = reiserfs_readdir,
        .fsync = reiserfs_dir_fsync,
index 6c97842..00a70ca 100644 (file)
@@ -37,6 +37,7 @@ static int smb_link(struct dentry *, struct inode *, struct dentry *);
 
 const struct file_operations smb_dir_operations =
 {
+       .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .readdir        = smb_readdir,
        .unlocked_ioctl = smb_ioctl,
index 3a84455..1660c81 100644 (file)
@@ -207,6 +207,7 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 /* readdir and lookup functions */
 const struct file_operations udf_dir_operations = {
+       .llseek                 = generic_file_llseek,
        .read                   = generic_read_dir,
        .readdir                = udf_readdir,
        .unlocked_ioctl         = udf_ioctl,
index 14743d9..ad9bc1e 100644 (file)
@@ -918,6 +918,7 @@ again:
        sbi->s_bytesex = BYTESEX_LE;
        switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) {
                case UFS_MAGIC:
+               case UFS_MAGIC_BW:
                case UFS2_MAGIC:
                case UFS_MAGIC_LFN:
                case UFS_MAGIC_FEA:
@@ -927,6 +928,7 @@ again:
        sbi->s_bytesex = BYTESEX_BE;
        switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) {
                case UFS_MAGIC:
+               case UFS_MAGIC_BW:
                case UFS2_MAGIC:
                case UFS_MAGIC_LFN:
                case UFS_MAGIC_FEA:
index 6943ec6..8aba544 100644 (file)
@@ -48,6 +48,7 @@ typedef __u16 __bitwise __fs16;
 #define UFS_SECTOR_SIZE 512
 #define UFS_SECTOR_BITS 9
 #define UFS_MAGIC  0x00011954
+#define UFS_MAGIC_BW 0x0f242697
 #define UFS2_MAGIC 0x19540119
 #define UFS_CIGAM  0x54190100 /* byteswapped MAGIC */
 
index 6920695..0c80bb3 100644 (file)
@@ -123,15 +123,7 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev,
                                                 size_t size,
                                                 enum dma_data_direction dir)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
-
-       BUG_ON(!valid_dma_direction(dir));
-       if (ops->sync_single_range_for_cpu) {
-               ops->sync_single_range_for_cpu(dev, addr, offset, size, dir);
-               debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
-
-       } else
-               dma_sync_single_for_cpu(dev, addr + offset, size, dir);
+       dma_sync_single_for_cpu(dev, addr + offset, size, dir);
 }
 
 static inline void dma_sync_single_range_for_device(struct device *dev,
@@ -140,15 +132,7 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
                                                    size_t size,
                                                    enum dma_data_direction dir)
 {
-       struct dma_map_ops *ops = get_dma_ops(dev);
-
-       BUG_ON(!valid_dma_direction(dir));
-       if (ops->sync_single_range_for_device) {
-               ops->sync_single_range_for_device(dev, addr, offset, size, dir);
-               debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir);
-
-       } else
-               dma_sync_single_for_device(dev, addr + offset, size, dir);
+       dma_sync_single_for_device(dev, addr + offset, size, dir);
 }
 
 static inline void
index 979c6a5..4f3d75e 100644 (file)
@@ -60,7 +60,9 @@ struct module;
  * @names: if set, must be an array of strings to use as alternative
  *      names for the GPIOs in this chip. Any entry in the array
  *      may be NULL if there is no alias for the GPIO, however the
- *      array must be @ngpio entries long.
+ *      array must be @ngpio entries long.  A name can include a single printk
+ *      format specifier for an unsigned int.  It is substituted by the actual
+ *      number of the gpio.
  *
  * A gpio_chip can help platforms abstract various sources of GPIOs so
  * they can all be accessed through a common programing interface.
@@ -88,6 +90,9 @@ struct gpio_chip {
                                                unsigned offset);
        int                     (*direction_output)(struct gpio_chip *chip,
                                                unsigned offset, int value);
+       int                     (*set_debounce)(struct gpio_chip *chip,
+                                               unsigned offset, unsigned debounce);
+
        void                    (*set)(struct gpio_chip *chip,
                                                unsigned offset, int value);
 
@@ -98,7 +103,7 @@ struct gpio_chip {
                                                struct gpio_chip *chip);
        int                     base;
        u16                     ngpio;
-       char                    **names;
+       const char              *const *names;
        unsigned                can_sleep:1;
        unsigned                exported:1;
 };
@@ -121,6 +126,8 @@ extern void gpio_free(unsigned gpio);
 extern int gpio_direction_input(unsigned gpio);
 extern int gpio_direction_output(unsigned gpio, int value);
 
+extern int gpio_set_debounce(unsigned gpio, unsigned debounce);
+
 extern int gpio_get_value_cansleep(unsigned gpio);
 extern void gpio_set_value_cansleep(unsigned gpio, int value);
 
index 8b94544..5de0735 100644 (file)
@@ -11,7 +11,9 @@ struct scatterlist {
        unsigned int    offset;
        unsigned int    length;
        dma_addr_t      dma_address;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
        unsigned int    dma_length;
+#endif
 };
 
 /*
@@ -22,22 +24,11 @@ struct scatterlist {
  * is 0.
  */
 #define sg_dma_address(sg)     ((sg)->dma_address)
-#ifndef sg_dma_len
-/*
- * Normally, you have an iommu on 64 bit machines, but not on 32 bit
- * machines. Architectures that are differnt should override this.
- */
-#if __BITS_PER_LONG == 64
+
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
 #define sg_dma_len(sg)         ((sg)->dma_length)
 #else
 #define sg_dma_len(sg)         ((sg)->length)
-#endif /* 64 bit */
-#endif /* sg_dma_len */
-
-#ifndef ISA_DMA_THRESHOLD
-#define ISA_DMA_THRESHOLD      (~0UL)
 #endif
 
-#define ARCH_HAS_SG_CHAIN
-
 #endif /* __ASM_GENERIC_SCATTERLIST_H */
index 510df36..fd60700 100644 (file)
@@ -34,6 +34,9 @@
 #ifndef cpu_to_node
 #define cpu_to_node(cpu)       ((void)(cpu),0)
 #endif
+#ifndef cpu_to_mem
+#define cpu_to_mem(cpu)                ((void)(cpu),0)
+#endif
 #ifndef parent_node
 #define parent_node(node)      ((void)(node),0)
 #endif
index 67e6520..ef779c6 100644 (file)
        }                                                               \
                                                                        \
        /* RapidIO route ops */                                         \
-       .rio_route        : AT(ADDR(.rio_route) - LOAD_OFFSET) {        \
-               VMLINUX_SYMBOL(__start_rio_route_ops) = .;              \
-               *(.rio_route_ops)                                       \
-               VMLINUX_SYMBOL(__end_rio_route_ops) = .;                \
+       .rio_ops        : AT(ADDR(.rio_ops) - LOAD_OFFSET) {            \
+               VMLINUX_SYMBOL(__start_rio_switch_ops) = .;             \
+               *(.rio_switch_ops)                                      \
+               VMLINUX_SYMBOL(__end_rio_switch_ops) = .;               \
        }                                                               \
                                                                        \
        TRACEDATA                                                       \
index 811dbb3..7a8db41 100644 (file)
@@ -212,6 +212,8 @@ extern void kick_iocb(struct kiocb *iocb);
 extern int aio_complete(struct kiocb *iocb, long res, long res2);
 struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
+extern long do_io_submit(aio_context_t ctx_id, long nr,
+                        struct iocb __user *__user *iocbpp, bool compat);
 #else
 static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
 static inline int aio_put_req(struct kiocb *iocb) { return 0; }
@@ -219,6 +221,9 @@ static inline void kick_iocb(struct kiocb *iocb) { }
 static inline int aio_complete(struct kiocb *iocb, long res, long res2) { return 0; }
 struct mm_struct;
 static inline void exit_aio(struct mm_struct *mm) { }
+static inline long do_io_submit(aio_context_t ctx_id, long nr,
+                               struct iocb __user * __user *iocbpp,
+                               bool compat) { return 0; }
 #endif /* CONFIG_AIO */
 
 static inline struct kiocb *list_kiocb(struct list_head *h)
index daf8c48..6fb2720 100644 (file)
@@ -141,6 +141,7 @@ extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
 extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
 extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
 extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
+extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
 
 #define BITMAP_LAST_WORD_MASK(nbits)                                   \
 (                                                                      \
index 8f78073..0c62160 100644 (file)
@@ -397,7 +397,7 @@ struct cftype {
         * This callback must be implemented, if you want provide
         * notification functionality.
         */
-       int (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
+       void (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
                        struct eventfd_ctx *eventfd);
 };
 
index 717c691..168f7da 100644 (file)
@@ -356,5 +356,9 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
 asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
                                  int flags, int mode);
 
+extern ssize_t compat_rw_copy_check_uvector(int type,
+               const struct compat_iovec __user *uvector, unsigned long nr_segs,
+               unsigned long fast_segs, struct iovec *fast_pointer,
+               struct iovec **ret_pointer);
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
index 20b51ca..457ed76 100644 (file)
@@ -69,6 +69,7 @@ extern void cpuset_task_status_allowed(struct seq_file *m,
                                        struct task_struct *task);
 
 extern int cpuset_mem_spread_node(void);
+extern int cpuset_slab_spread_node(void);
 
 static inline int cpuset_do_page_mem_spread(void)
 {
@@ -194,6 +195,11 @@ static inline int cpuset_mem_spread_node(void)
        return 0;
 }
 
+static inline int cpuset_slab_spread_node(void)
+{
+       return 0;
+}
+
 static inline int cpuset_do_page_mem_spread(void)
 {
        return 0;
index 52507c3..75c0fa8 100644 (file)
@@ -156,7 +156,6 @@ extern int copy_creds(struct task_struct *, unsigned long);
 extern struct cred *cred_alloc_blank(void);
 extern struct cred *prepare_creds(void);
 extern struct cred *prepare_exec_creds(void);
-extern struct cred *prepare_usermodehelper_creds(void);
 extern int commit_creds(struct cred *);
 extern void abort_creds(struct cred *);
 extern const struct cred *override_creds(const struct cred *);
index ca32ed7..89b7e1a 100644 (file)
@@ -40,16 +40,6 @@ struct dma_map_ops {
        void (*sync_single_for_device)(struct device *dev,
                                       dma_addr_t dma_handle, size_t size,
                                       enum dma_data_direction dir);
-       void (*sync_single_range_for_cpu)(struct device *dev,
-                                         dma_addr_t dma_handle,
-                                         unsigned long offset,
-                                         size_t size,
-                                         enum dma_data_direction dir);
-       void (*sync_single_range_for_device)(struct device *dev,
-                                            dma_addr_t dma_handle,
-                                            unsigned long offset,
-                                            size_t size,
-                                            enum dma_data_direction dir);
        void (*sync_sg_for_cpu)(struct device *dev,
                                struct scatterlist *sg, int nents,
                                enum dma_data_direction dir);
@@ -105,21 +95,6 @@ static inline int is_device_dma_capable(struct device *dev)
 #include <asm-generic/dma-mapping-broken.h>
 #endif
 
-/* for backwards compatibility, removed soon */
-static inline void __deprecated dma_sync_single(struct device *dev,
-                                               dma_addr_t addr, size_t size,
-                                               enum dma_data_direction dir)
-{
-       dma_sync_single_for_cpu(dev, addr, size, dir);
-}
-
-static inline void __deprecated dma_sync_sg(struct device *dev,
-                                           struct scatterlist *sg, int nelems,
-                                           enum dma_data_direction dir)
-{
-       dma_sync_sg_for_cpu(dev, sg, nelems, dir);
-}
-
 static inline u64 dma_get_mask(struct device *dev)
 {
        if (dev && dev->dma_mask && *dev->dma_mask)
index 4bd94bf..72e2b8a 100644 (file)
 #define CSR_DESCRIPTOR         0x01
 #define CSR_VENDOR             0x03
 #define CSR_HARDWARE_VERSION   0x04
-#define CSR_NODE_CAPABILITIES  0x0c
 #define CSR_UNIT               0x11
 #define CSR_SPECIFIER_ID       0x12
 #define CSR_VERSION            0x13
 #define CSR_DEPENDENT_INFO     0x14
 #define CSR_MODEL              0x17
-#define CSR_INSTANCE           0x18
 #define CSR_DIRECTORY_ID       0x20
 
 struct fw_csr_iterator {
@@ -89,7 +87,6 @@ struct fw_card {
        int current_tlabel;
        u64 tlabel_mask;
        struct list_head transaction_list;
-       struct timer_list flush_timer;
        unsigned long reset_jiffies;
 
        unsigned long long guid;
@@ -290,6 +287,8 @@ struct fw_transaction {
        int tlabel;
        int timestamp;
        struct list_head link;
+       struct fw_card *card;
+       struct timer_list split_timeout_timer;
 
        struct fw_packet packet;
 
index b336cb9..85e823a 100644 (file)
@@ -2228,6 +2228,7 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
 extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
 extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
 extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset,
@@ -2250,10 +2251,15 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
 #endif
 
 #ifdef CONFIG_BLOCK
+struct bio;
+typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
+                           loff_t file_offset);
+void dio_end_io(struct bio *bio, int error);
+
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        struct block_device *bdev, const struct iovec *iov, loff_t offset,
        unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-       int lock_type);
+       dio_submit_t submit_io, int lock_type);
 
 enum {
        /* need locking between buffered and direct access */
@@ -2269,7 +2275,7 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
        dio_iodone_t end_io)
 {
        return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-                                   nr_segs, get_block, end_io,
+                                   nr_segs, get_block, end_io, NULL,
                                    DIO_LOCKING | DIO_SKIP_HOLES);
 }
 
@@ -2279,7 +2285,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
        dio_iodone_t end_io)
 {
        return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-                               nr_segs, get_block, end_io, 0);
+                                   nr_segs, get_block, end_io, NULL, 0);
 }
 #endif
 
index 4e949a5..03f616b 100644 (file)
@@ -51,6 +51,11 @@ static inline int gpio_direction_output(unsigned gpio, int value)
        return -ENOSYS;
 }
 
+static inline int gpio_set_debounce(unsigned gpio, unsigned debounce)
+{
+       return -ENOSYS;
+}
+
 static inline int gpio_get_value(unsigned gpio)
 {
        /* GPIO can never have been requested or set as {in,out}put */
index e103366..c04bac8 100644 (file)
@@ -7,6 +7,9 @@ struct max732x_platform_data {
        /* number of the first GPIO */
        unsigned        gpio_base;
 
+       /* interrupt base */
+       int             irq_base;
+
        void            *context;       /* param to setup/teardown */
 
        int             (*setup)(struct i2c_client *client,
index d5c5a60..139ba52 100644 (file)
@@ -24,7 +24,7 @@ struct pca953x_platform_data {
        int             (*teardown)(struct i2c_client *client,
                                unsigned gpio, unsigned ngpio,
                                void *context);
-       char            **names;
+       const char      *const *names;
 };
 
 #endif /* _LINUX_PCA953X_H */
index 7996fc2..2beaa13 100644 (file)
@@ -16,7 +16,7 @@ extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
 #define INIT_SIGNALS(sig) {                                            \
-       .count          = ATOMIC_INIT(1),                               \
+       .nr_threads     = 1,                                            \
        .wait_chldexit  = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
        .shared_pending = {                                             \
                .list = LIST_HEAD_INIT(sig.shared_pending.list),        \
@@ -35,7 +35,7 @@ extern struct nsproxy init_nsproxy;
 
 #define INIT_SIGHAND(sighand) {                                                \
        .count          = ATOMIC_INIT(1),                               \
-       .action         = { { { .sa_handler = NULL, } }, },             \
+       .action         = { { { .sa_handler = SIG_DFL, } }, },          \
        .siglock        = __SPIN_LOCK_UNLOCKED(sighand.siglock),        \
        .signalfd_wqh   = __WAIT_QUEUE_HEAD_INITIALIZER(sighand.signalfd_wqh),  \
 }
@@ -45,9 +45,9 @@ extern struct group_info init_groups;
 #define INIT_STRUCT_PID {                                              \
        .count          = ATOMIC_INIT(1),                               \
        .tasks          = {                                             \
-               { .first = &init_task.pids[PIDTYPE_PID].node },         \
-               { .first = &init_task.pids[PIDTYPE_PGID].node },        \
-               { .first = &init_task.pids[PIDTYPE_SID].node },         \
+               { .first = NULL },                                      \
+               { .first = NULL },                                      \
+               { .first = NULL },                                      \
        },                                                              \
        .level          = 0,                                            \
        .numbers        = { {                                           \
@@ -61,7 +61,7 @@ extern struct group_info init_groups;
 {                                                              \
        .node = {                                               \
                .next = NULL,                                   \
-               .pprev = &init_struct_pid.tasks[type].first,    \
+               .pprev = NULL,                                  \
        },                                                      \
        .pid = &init_struct_pid,                                \
 }
@@ -163,6 +163,7 @@ extern struct cred init_cred;
                [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),           \
                [PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),            \
        },                                                              \
+       .thread_group   = LIST_HEAD_INIT(tsk.thread_group),             \
        .dirties = INIT_PROP_LOCAL_SINGLE(dirties),                     \
        INIT_IDS                                                        \
        INIT_PERF_EVENTS(tsk)                                           \
index 83524e4..6fcc910 100644 (file)
@@ -1155,7 +1155,7 @@ struct input_dev {
 
        int sync;
 
-       int abs[ABS_MAX + 1];
+       int abs[ABS_CNT];
        int rep[REP_MAX + 1];
 
        unsigned long key[BITS_TO_LONGS(KEY_CNT)];
@@ -1163,11 +1163,11 @@ struct input_dev {
        unsigned long snd[BITS_TO_LONGS(SND_CNT)];
        unsigned long sw[BITS_TO_LONGS(SW_CNT)];
 
-       int absmax[ABS_MAX + 1];
-       int absmin[ABS_MAX + 1];
-       int absfuzz[ABS_MAX + 1];
-       int absflat[ABS_MAX + 1];
-       int absres[ABS_MAX + 1];
+       int absmax[ABS_CNT];
+       int absmin[ABS_CNT];
+       int absfuzz[ABS_CNT];
+       int absflat[ABS_CNT];
+       int absres[ABS_CNT];
 
        int (*open)(struct input_dev *dev);
        void (*close)(struct input_dev *dev);
index 9e20c29..47199b1 100644 (file)
@@ -64,8 +64,8 @@ struct js_event {
 #define JSIOCSCORR             _IOW('j', 0x21, struct js_corr)                 /* set correction values */
 #define JSIOCGCORR             _IOR('j', 0x22, struct js_corr)                 /* get correction values */
 
-#define JSIOCSAXMAP            _IOW('j', 0x31, __u8[ABS_MAX + 1])              /* set axis mapping */
-#define JSIOCGAXMAP            _IOR('j', 0x32, __u8[ABS_MAX + 1])              /* get axis mapping */
+#define JSIOCSAXMAP            _IOW('j', 0x31, __u8[ABS_CNT])                  /* set axis mapping */
+#define JSIOCGAXMAP            _IOR('j', 0x32, __u8[ABS_CNT])                  /* get axis mapping */
 #define JSIOCSBTNMAP           _IOW('j', 0x33, __u16[KEY_MAX - BTN_MISC + 1])  /* set button mapping */
 #define JSIOCGBTNMAP           _IOR('j', 0x34, __u16[KEY_MAX - BTN_MISC + 1])  /* get button mapping */
 
index facb27f..6efd7a7 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/stddef.h>
 #include <linux/errno.h>
 #include <linux/compiler.h>
+#include <linux/workqueue.h>
 
 #define KMOD_PATH_LEN 256
 
@@ -45,19 +46,6 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS;
 
 struct key;
 struct file;
-struct subprocess_info;
-
-/* Allocate a subprocess_info structure */
-struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
-                                                 char **envp, gfp_t gfp_mask);
-
-/* Set various pieces of state into the subprocess_info structure */
-void call_usermodehelper_setkeys(struct subprocess_info *info,
-                                struct key *session_keyring);
-int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
-                                 struct file **filp);
-void call_usermodehelper_setcleanup(struct subprocess_info *info,
-                                   void (*cleanup)(char **argv, char **envp));
 
 enum umh_wait {
        UMH_NO_WAIT = -1,       /* don't wait at all */
@@ -65,6 +53,29 @@ enum umh_wait {
        UMH_WAIT_PROC = 1,      /* wait for the process to complete */
 };
 
+struct subprocess_info {
+       struct work_struct work;
+       struct completion *complete;
+       char *path;
+       char **argv;
+       char **envp;
+       enum umh_wait wait;
+       int retval;
+       int (*init)(struct subprocess_info *info);
+       void (*cleanup)(struct subprocess_info *info);
+       void *data;
+};
+
+/* Allocate a subprocess_info structure */
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+                                                 char **envp, gfp_t gfp_mask);
+
+/* Set various pieces of state into the subprocess_info structure */
+void call_usermodehelper_setfns(struct subprocess_info *info,
+                   int (*init)(struct subprocess_info *info),
+                   void (*cleanup)(struct subprocess_info *info),
+                   void *data);
+
 /* Actually execute the sub-process */
 int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait);
 
@@ -73,38 +84,33 @@ int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait);
 void call_usermodehelper_freeinfo(struct subprocess_info *info);
 
 static inline int
-call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
+call_usermodehelper_fns(char *path, char **argv, char **envp,
+                       enum umh_wait wait,
+                       int (*init)(struct subprocess_info *info),
+                       void (*cleanup)(struct subprocess_info *), void *data)
 {
        struct subprocess_info *info;
        gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
 
        info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
+
        if (info == NULL)
                return -ENOMEM;
+
+       call_usermodehelper_setfns(info, init, cleanup, data);
+
        return call_usermodehelper_exec(info, wait);
 }
 
 static inline int
-call_usermodehelper_keys(char *path, char **argv, char **envp,
-                        struct key *session_keyring, enum umh_wait wait)
+call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
 {
-       struct subprocess_info *info;
-       gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
-
-       info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
-       if (info == NULL)
-               return -ENOMEM;
-
-       call_usermodehelper_setkeys(info, session_keyring);
-       return call_usermodehelper_exec(info, wait);
+       return call_usermodehelper_fns(path, argv, envp, wait,
+                                      NULL, NULL, NULL);
 }
 
 extern void usermodehelper_init(void);
 
-struct file;
-extern int call_usermodehelper_pipe(char *path, char *argv[], char *envp[],
-                                   struct file **filp);
-
 extern int usermodehelper_disable(void);
 extern void usermodehelper_enable(void);
 
index 0589479..9411d32 100644 (file)
@@ -90,7 +90,8 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem);
 
 extern int
-mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
+mem_cgroup_prepare_migration(struct page *page,
+       struct page *newpage, struct mem_cgroup **ptr);
 extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
        struct page *oldpage, struct page *newpage);
 
@@ -227,7 +228,8 @@ static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
 }
 
 static inline int
-mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
+       struct mem_cgroup **ptr)
 {
        return 0;
 }
index 3196c84..f65913c 100644 (file)
@@ -230,7 +230,7 @@ static inline void *mmc_priv(struct mmc_host *host)
 #define mmc_classdev(x)        (&(x)->class_dev)
 #define mmc_hostname(x)        (dev_name(&(x)->class_dev))
 
-extern int mmc_suspend_host(struct mmc_host *, pm_message_t);
+extern int mmc_suspend_host(struct mmc_host *);
 extern int mmc_resume_host(struct mmc_host *);
 
 extern void mmc_power_save_host(struct mmc_host *host);
diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h
new file mode 100644 (file)
index 0000000..9188c97
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * include/linux/mmc/sdhci-spear.h
+ *
+ * SDHCI declarations specific to ST SPEAr platform
+ *
+ * Copyright (C) 2010 ST Microelectronics
+ * Viresh Kumar<viresh.kumar@st.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef MMC_SDHCI_SPEAR_H
+#define MMC_SDHCI_SPEAR_H
+
+#include <linux/platform_device.h>
+/*
+ * struct sdhci_plat_data: spear sdhci platform data structure
+ *
+ * @card_power_gpio: gpio pin for enabling/disabling power to sdhci socket
+ * @power_active_high: if set, enable power to sdhci socket by setting
+ *                     card_power_gpio
+ * @power_always_enb: If set, then enable power on probe, otherwise enable only
+ *                     on card insertion and disable on card removal.
+ * card_int_gpio: gpio pin used for card detection
+ */
+struct sdhci_plat_data {
+       int card_power_gpio;
+       int power_active_high;
+       int power_always_enb;
+       int card_int_gpio;
+};
+
+/* This function is used to set platform_data field of pdev->dev */
+static inline void
+sdhci_set_plat_data(struct platform_device *pdev, struct sdhci_plat_data *data)
+{
+       pdev->dev.platform_data = data;
+}
+
+#endif /* MMC_SDHCI_SPEAR_H */
index c6c0cce..31baaf8 100644 (file)
@@ -145,6 +145,9 @@ extern void sdio_writew(struct sdio_func *func, u16 b,
 extern void sdio_writel(struct sdio_func *func, u32 b,
        unsigned int addr, int *err_ret);
 
+extern u8 sdio_writeb_readb(struct sdio_func *func, u8 write_byte,
+       unsigned int addr, int *err_ret);
+
 extern int sdio_memcpy_toio(struct sdio_func *func, unsigned int addr,
        void *src, int count);
 extern int sdio_writesb(struct sdio_func *func, unsigned int addr,
diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
new file mode 100644 (file)
index 0000000..aafe832
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * include/linux/mmc/sh_mmcif.h
+ *
+ * platform data for eMMC driver
+ *
+ * Copyright (C) 2010 Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ */
+
+#ifndef __SH_MMCIF_H__
+#define __SH_MMCIF_H__
+
+/*
+ * MMCIF : CE_CLK_CTRL [19:16]
+ * 1000 : Peripheral clock / 512
+ * 0111 : Peripheral clock / 256
+ * 0110 : Peripheral clock / 128
+ * 0101 : Peripheral clock / 64
+ * 0100 : Peripheral clock / 32
+ * 0011 : Peripheral clock / 16
+ * 0010 : Peripheral clock / 8
+ * 0001 : Peripheral clock / 4
+ * 0000 : Peripheral clock / 2
+ * 1111 : Peripheral clock (sup_pclk set '1')
+ */
+
+struct sh_mmcif_plat_data {
+       void (*set_pwr)(struct platform_device *pdev, int state);
+       void (*down_pwr)(struct platform_device *pdev);
+       u8      sup_pclk;       /* 1 :SH7757, 0: SH7724/SH7372 */
+       unsigned long caps;
+       u32     ocr;
+};
+
+#endif /* __SH_MMCIF_H__ */
index 0fa4913..b4d109e 100644 (file)
@@ -671,6 +671,12 @@ void memory_present(int nid, unsigned long start, unsigned long end);
 static inline void memory_present(int nid, unsigned long start, unsigned long end) {}
 #endif
 
+#ifdef CONFIG_HAVE_MEMORYLESS_NODES
+int local_memory_node(int node_id);
+#else
+static inline int local_memory_node(int node_id) { return node_id; };
+#endif
+
 #ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
 unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
 #endif
index dba35e4..8a8f1d0 100644 (file)
@@ -66,6 +66,8 @@
  * int num_online_nodes()              Number of online Nodes
  * int num_possible_nodes()            Number of all possible Nodes
  *
+ * int node_random(mask)               Random node with set bit in mask
+ *
  * int node_online(node)               Is some node online?
  * int node_possible(node)             Is some node possible?
  *
@@ -430,6 +432,10 @@ static inline void node_set_offline(int nid)
        node_clear_state(nid, N_ONLINE);
        nr_online_nodes = num_node_state(N_ONLINE);
 }
+
+#define node_random(mask) __node_random(&(mask))
+extern int __node_random(const nodemask_t *maskp);
+
 #else
 
 static inline int node_state(int node, enum node_states state)
@@ -460,6 +466,8 @@ static inline int num_node_state(enum node_states state)
 
 #define node_set_online(node)     node_set_state((node), N_ONLINE)
 #define node_set_offline(node)    node_clear_state((node), N_ONLINE)
+
+static inline int node_random(const nodemask_t mask) { return 0; }
 #endif
 
 #define node_online_map        node_states[N_ONLINE]
index 7c36096..540703b 100644 (file)
@@ -164,7 +164,10 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
 /* Encapsulate (negative) errno value (in particular, NOTIFY_BAD <=> EPERM). */
 static inline int notifier_from_errno(int err)
 {
-       return NOTIFY_STOP_MASK | (NOTIFY_OK - err);
+       if (err)
+               return NOTIFY_STOP_MASK | (NOTIFY_OK - err);
+
+       return NOTIFY_OK;
 }
 
 /* Restore (negative) errno value from notify return value. */
index aef22ae..5bb13b3 100644 (file)
@@ -40,6 +40,7 @@ enum {
        PCG_USED, /* this object is in use. */
        PCG_ACCT_LRU, /* page has been accounted for */
        PCG_FILE_MAPPED, /* page is accounted as "mapped" */
+       PCG_MIGRATION, /* under page migration */
 };
 
 #define TESTPCGFLAG(uname, lname)                      \
@@ -79,6 +80,10 @@ SETPCGFLAG(FileMapped, FILE_MAPPED)
 CLEARPCGFLAG(FileMapped, FILE_MAPPED)
 TESTPCGFLAG(FileMapped, FILE_MAPPED)
 
+SETPCGFLAG(Migration, MIGRATION)
+CLEARPCGFLAG(Migration, MIGRATION)
+TESTPCGFLAG(Migration, MIGRATION)
+
 static inline int page_cgroup_nid(struct page_cgroup *pc)
 {
        return page_to_nid(pc->page);
index 370abb1..e38ae53 100644 (file)
@@ -9,6 +9,10 @@
 
 #include <linux/fs.h>
 
+#define DQUOT_SPACE_WARN       0x1
+#define DQUOT_SPACE_RESERVE    0x2
+#define DQUOT_SPACE_NOFAIL     0x4
+
 static inline struct quota_info *sb_dqopt(struct super_block *sb)
 {
        return &sb->s_dquot;
@@ -41,9 +45,8 @@ int dquot_scan_active(struct super_block *sb,
 struct dquot *dquot_alloc(struct super_block *sb, int type);
 void dquot_destroy(struct dquot *dquot);
 
-int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve);
-void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
+int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags);
+void __dquot_free_space(struct inode *inode, qsize_t number, int flags);
 
 int dquot_alloc_inode(const struct inode *inode);
 
@@ -242,17 +245,17 @@ static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
 }
 
 static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
-               int warn, int reserve)
+               int flags)
 {
-       if (!reserve)
+       if (!(flags & DQUOT_SPACE_RESERVE))
                inode_add_bytes(inode, number);
        return 0;
 }
 
 static inline void __dquot_free_space(struct inode *inode, qsize_t number,
-               int reserve)
+               int flags)
 {
-       if (!reserve)
+       if (!(flags & DQUOT_SPACE_RESERVE))
                inode_sub_bytes(inode, number);
 }
 
@@ -268,7 +271,13 @@ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 
 static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
-       return __dquot_alloc_space(inode, nr, 1, 0);
+       return __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN);
+}
+
+static inline void dquot_alloc_space_nofail(struct inode *inode, qsize_t nr)
+{
+       __dquot_alloc_space(inode, nr, DQUOT_SPACE_WARN|DQUOT_SPACE_NOFAIL);
+       mark_inode_dirty(inode);
 }
 
 static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
@@ -286,6 +295,11 @@ static inline int dquot_alloc_block_nodirty(struct inode *inode, qsize_t nr)
        return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits);
 }
 
+static inline void dquot_alloc_block_nofail(struct inode *inode, qsize_t nr)
+{
+       dquot_alloc_space_nofail(inode, nr << inode->i_blkbits);
+}
+
 static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
 {
        return dquot_alloc_space(inode, nr << inode->i_blkbits);
@@ -293,7 +307,7 @@ static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
 
 static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0, 0);
+       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0);
 }
 
 static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
@@ -308,7 +322,8 @@ static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
 
 static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
 {
-       return __dquot_alloc_space(inode, nr << inode->i_blkbits, 1, 1);
+       return __dquot_alloc_space(inode, nr << inode->i_blkbits,
+                               DQUOT_SPACE_WARN|DQUOT_SPACE_RESERVE);
 }
 
 static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
@@ -345,7 +360,7 @@ static inline void dquot_free_block(struct inode *inode, qsize_t nr)
 static inline void dquot_release_reservation_block(struct inode *inode,
                qsize_t nr)
 {
-       __dquot_free_space(inode, nr << inode->i_blkbits, 1);
+       __dquot_free_space(inode, nr << inode->i_blkbits, DQUOT_SPACE_RESERVE);
 }
 
 #endif /* _LINUX_QUOTAOPS_ */
index 25d02fe..fb7ab9d 100644 (file)
@@ -40,6 +40,10 @@ struct rand_pool_info {
        __u32   buf[0];
 };
 
+struct rnd_state {
+       __u32 s1, s2, s3;
+};
+
 /* Exported functions */
 
 #ifdef __KERNEL__
@@ -74,6 +78,30 @@ unsigned long randomize_range(unsigned long start, unsigned long end, unsigned l
 u32 random32(void);
 void srandom32(u32 seed);
 
+u32 prandom32(struct rnd_state *);
+
+/*
+ * Handle minimum values for seeds
+ */
+static inline u32 __seed(u32 x, u32 m)
+{
+       return (x < m) ? x + m : x;
+}
+
+/**
+ * prandom32_seed - set seed for prandom32().
+ * @state: pointer to state structure to receive the seed.
+ * @seed: arbitrary 64-bit value to use as a seed.
+ */
+static inline void prandom32_seed(struct rnd_state *state, u64 seed)
+{
+       u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
+
+       state->s1 = __seed(i, 1);
+       state->s2 = __seed(i, 7);
+       state->s3 = __seed(i, 15);
+}
+
 #endif /* __KERNEL___ */
 
 #endif /* _LINUX_RANDOM_H */
index dc0c755..19b5f22 100644 (file)
 #define RIO_INB_MBOX_RESOURCE  1
 #define RIO_OUTB_MBOX_RESOURCE 2
 
+#define RIO_PW_MSG_SIZE                64
+
 extern struct bus_type rio_bus_type;
 extern struct list_head rio_devices;   /* list of all devices */
 
 struct rio_mport;
+union rio_pw_msg;
 
 /**
  * struct rio_dev - RIO device info
@@ -107,11 +110,15 @@ struct rio_dev {
        u32 swpinfo;            /* Only used for switches */
        u32 src_ops;
        u32 dst_ops;
+       u32 comp_tag;
+       u32 phys_efptr;
+       u32 em_efptr;
        u64 dma_mask;
        struct rio_switch *rswitch;     /* RIO switch info */
        struct rio_driver *driver;      /* RIO driver claiming this device */
        struct device dev;      /* LDM device structure */
        struct resource riores[RIO_MAX_DEV_RESOURCES];
+       int (*pwcback) (struct rio_dev *rdev, union rio_pw_msg *msg, int step);
        u16 destid;
 };
 
@@ -211,8 +218,12 @@ struct rio_net {
  * @hopcount: Hopcount to this switch
  * @destid: Associated destid in the path
  * @route_table: Copy of switch routing table
+ * @port_ok: Status of each port (one bit per port) - OK=1 or UNINIT=0
  * @add_entry: Callback for switch-specific route add function
  * @get_entry: Callback for switch-specific route get function
+ * @clr_table: Callback for switch-specific clear route table function
+ * @em_init: Callback for switch-specific error management initialization function
+ * @em_handle: Callback for switch-specific error management handler function
  */
 struct rio_switch {
        struct list_head node;
@@ -220,10 +231,19 @@ struct rio_switch {
        u16 hopcount;
        u16 destid;
        u8 *route_table;
+       u32 port_ok;
        int (*add_entry) (struct rio_mport * mport, u16 destid, u8 hopcount,
                          u16 table, u16 route_destid, u8 route_port);
        int (*get_entry) (struct rio_mport * mport, u16 destid, u8 hopcount,
                          u16 table, u16 route_destid, u8 * route_port);
+       int (*clr_table) (struct rio_mport *mport, u16 destid, u8 hopcount,
+                         u16 table);
+       int (*set_domain) (struct rio_mport *mport, u16 destid, u8 hopcount,
+                          u8 sw_domain);
+       int (*get_domain) (struct rio_mport *mport, u16 destid, u8 hopcount,
+                          u8 *sw_domain);
+       int (*em_init) (struct rio_dev *dev);
+       int (*em_handle) (struct rio_dev *dev, u8 swport);
 };
 
 /* Low-level architecture-dependent routines */
@@ -235,6 +255,7 @@ struct rio_switch {
  * @cread: Callback to perform network read of config space.
  * @cwrite: Callback to perform network write of config space.
  * @dsend: Callback to send a doorbell message.
+ * @pwenable: Callback to enable/disable port-write message handling.
  */
 struct rio_ops {
        int (*lcread) (struct rio_mport *mport, int index, u32 offset, int len,
@@ -246,6 +267,7 @@ struct rio_ops {
        int (*cwrite) (struct rio_mport *mport, int index, u16 destid,
                        u8 hopcount, u32 offset, int len, u32 data);
        int (*dsend) (struct rio_mport *mport, int index, u16 destid, u16 data);
+       int (*pwenable) (struct rio_mport *mport, int enable);
 };
 
 #define RIO_RESOURCE_MEM       0x00000100
@@ -302,21 +324,28 @@ struct rio_device_id {
 };
 
 /**
- * struct rio_route_ops - Per-switch route operations
+ * struct rio_switch_ops - Per-switch operations
  * @vid: RIO vendor ID
  * @did: RIO device ID
- * @add_hook: Callback that adds a route entry
- * @get_hook: Callback that gets a route entry
+ * @init_hook: Callback that performs switch device initialization
  *
- * Defines the operations that are necessary to manipulate the route
- * tables for a particular RIO switch device.
+ * Defines the operations that are necessary to initialize/control
+ * a particular RIO switch device.
  */
-struct rio_route_ops {
+struct rio_switch_ops {
        u16 vid, did;
-       int (*add_hook) (struct rio_mport * mport, u16 destid, u8 hopcount,
-                        u16 table, u16 route_destid, u8 route_port);
-       int (*get_hook) (struct rio_mport * mport, u16 destid, u8 hopcount,
-                        u16 table, u16 route_destid, u8 * route_port);
+       int (*init_hook) (struct rio_dev *rdev, int do_enum);
+};
+
+union rio_pw_msg {
+       struct {
+               u32 comptag;    /* Component Tag CSR */
+               u32 errdetect;  /* Port N Error Detect CSR */
+               u32 is_port;    /* Implementation specific + PortID */
+               u32 ltlerrdet;  /* LTL Error Detect CSR */
+               u32 padding[12];
+       } em;
+       u32 raw[RIO_PW_MSG_SIZE/sizeof(u32)];
 };
 
 /* Architecture and hardware-specific functions */
index c93a58a..edc55da 100644 (file)
@@ -413,6 +413,12 @@ void rio_release_regions(struct rio_dev *);
 int rio_request_region(struct rio_dev *, int, char *);
 void rio_release_region(struct rio_dev *, int);
 
+/* Port-Write management */
+extern int rio_request_inb_pwrite(struct rio_dev *,
+                       int (*)(struct rio_dev *, union rio_pw_msg*, int));
+extern int rio_release_inb_pwrite(struct rio_dev *);
+extern int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg);
+
 /* LDM support */
 int rio_register_driver(struct rio_driver *);
 void rio_unregister_driver(struct rio_driver *);
index 919d4e0..db50e1c 100644 (file)
 
 #define RIO_VID_TUNDRA                 0x000d
 #define RIO_DID_TSI500                 0x0500
+#define RIO_DID_TSI568                 0x0568
+#define RIO_DID_TSI572                 0x0572
+#define RIO_DID_TSI574                 0x0574
+#define RIO_DID_TSI576                 0x0578 /* Same ID as Tsi578 */
+#define RIO_DID_TSI577                 0x0577
+#define RIO_DID_TSI578                 0x0578
+
+#define RIO_VID_IDT                    0x0038
+#define RIO_DID_IDT70K200              0x0310
+#define RIO_DID_IDTCPS8                        0x035c
+#define RIO_DID_IDTCPS12               0x035d
+#define RIO_DID_IDTCPS16               0x035b
+#define RIO_DID_IDTCPS6Q               0x035f
+#define RIO_DID_IDTCPS10Q              0x035e
 
 #endif                         /* LINUX_RIO_IDS_H */
index 326540f..aedee04 100644 (file)
@@ -39,6 +39,8 @@
 #define  RIO_PEF_INB_MBOX2             0x00200000      /* [II] Mailbox 2 */
 #define  RIO_PEF_INB_MBOX3             0x00100000      /* [II] Mailbox 3 */
 #define  RIO_PEF_INB_DOORBELL          0x00080000      /* [II] Doorbells */
+#define  RIO_PEF_EXT_RT                        0x00000200      /* [III, 1.3] Extended route table support */
+#define  RIO_PEF_STD_RT                        0x00000100      /* [III, 1.3] Standard route table support */
 #define  RIO_PEF_CTLS                  0x00000010      /* [III] CTLS */
 #define  RIO_PEF_EXT_FEATURES          0x00000008      /* [I] EFT_PTR valid */
 #define  RIO_PEF_ADDR_66               0x00000004      /* [I] 66 bits */
 #define  RIO_OPS_ATOMIC_CLR            0x00000010      /* [I] Atomic clr op */
 #define  RIO_OPS_PORT_WRITE            0x00000004      /* [I] Port-write op */
 
-                                       /* 0x20-0x3c *//* Reserved */
+                                       /* 0x20-0x30 *//* Reserved */
+
+#define        RIO_SWITCH_RT_LIMIT     0x34    /* [III, 1.3] Switch Route Table Destination ID Limit CAR */
+#define         RIO_RT_MAX_DESTID              0x0000ffff
 
 #define RIO_MBOX_CSR           0x40    /* [II] Mailbox CSR */
 #define  RIO_MBOX0_AVAIL               0x80000000      /* [II] Mbox 0 avail */
 #define RIO_HOST_DID_LOCK_CSR  0x68    /* [III] Host Base Device ID Lock CSR */
 #define RIO_COMPONENT_TAG_CSR  0x6c    /* [III] Component Tag CSR */
 
-                                       /* 0x70-0xf8 *//* Reserved */
+#define RIO_STD_RTE_CONF_DESTID_SEL_CSR        0x70
+#define RIO_STD_RTE_CONF_PORT_SEL_CSR  0x74
+#define RIO_STD_RTE_DEFAULT_PORT       0x78
+
+                                       /* 0x7c-0xf8 *//* Reserved */
                                        /* 0x100-0xfff8 *//* [I] Extended Features Space */
                                        /* 0x10000-0xfffff8 *//* [I] Implementation-defined Space */
 
 #define RIO_EFB_PAR_EP_ID      0x0001  /* [IV] LP/LVDS EP Devices */
 #define RIO_EFB_PAR_EP_REC_ID  0x0002  /* [IV] LP/LVDS EP Recovery Devices */
 #define RIO_EFB_PAR_EP_FREE_ID 0x0003  /* [IV] LP/LVDS EP Free Devices */
+#define RIO_EFB_SER_EP_ID_V13P 0x0001  /* [VI] LP/Serial EP Devices, RapidIO Spec ver 1.3 and above */
+#define RIO_EFB_SER_EP_REC_ID_V13P     0x0002  /* [VI] LP/Serial EP Recovery Devices, RapidIO Spec ver 1.3 and above */
+#define RIO_EFB_SER_EP_FREE_ID_V13P    0x0003  /* [VI] LP/Serial EP Free Devices, RapidIO Spec ver 1.3 and above */
 #define RIO_EFB_SER_EP_ID      0x0004  /* [VI] LP/Serial EP Devices */
 #define RIO_EFB_SER_EP_REC_ID  0x0005  /* [VI] LP/Serial EP Recovery Devices */
 #define RIO_EFB_SER_EP_FREE_ID 0x0006  /* [VI] LP/Serial EP Free Devices */
+#define RIO_EFB_SER_EP_FREC_ID 0x0009  /* [VI] LP/Serial EP Free Recovery Devices */
+#define RIO_EFB_ERR_MGMNT      0x0007  /* [VIII] Error Management Extensions */
 
 /*
  * Physical 8/16 LP-LVDS
 #define RIO_PORT_MNT_HEADER            0x0000
 #define RIO_PORT_REQ_CTL_CSR           0x0020
 #define RIO_PORT_RSP_CTL_CSR           0x0024  /* 0x0001/0x0002 */
+#define RIO_PORT_LINKTO_CTL_CSR                0x0020  /* Serial */
+#define RIO_PORT_RSPTO_CTL_CSR         0x0024  /* Serial */
 #define RIO_PORT_GEN_CTL_CSR           0x003c
 #define  RIO_PORT_GEN_HOST             0x80000000
 #define  RIO_PORT_GEN_MASTER           0x40000000
 #define  RIO_PORT_GEN_DISCOVERED       0x20000000
 #define RIO_PORT_N_MNT_REQ_CSR(x)      (0x0040 + x*0x20)       /* 0x0002 */
 #define RIO_PORT_N_MNT_RSP_CSR(x)      (0x0044 + x*0x20)       /* 0x0002 */
+#define  RIO_PORT_N_MNT_RSP_RVAL       0x80000000 /* Response Valid */
+#define  RIO_PORT_N_MNT_RSP_ASTAT      0x000003e0 /* ackID Status */
+#define  RIO_PORT_N_MNT_RSP_LSTAT      0x0000001f /* Link Status */
 #define RIO_PORT_N_ACK_STS_CSR(x)      (0x0048 + x*0x20)       /* 0x0002 */
-#define RIO_PORT_N_ERR_STS_CSR(x)      (0x58 + x*0x20)
-#define PORT_N_ERR_STS_PORT_OK 0x00000002
-#define RIO_PORT_N_CTL_CSR(x)          (0x5c + x*0x20)
+#define  RIO_PORT_N_ACK_CLEAR          0x80000000
+#define  RIO_PORT_N_ACK_INBOUND                0x1f000000
+#define  RIO_PORT_N_ACK_OUTSTAND       0x00001f00
+#define  RIO_PORT_N_ACK_OUTBOUND       0x0000001f
+#define RIO_PORT_N_ERR_STS_CSR(x)      (0x0058 + x*0x20)
+#define  RIO_PORT_N_ERR_STS_PW_OUT_ES  0x00010000 /* Output Error-stopped */
+#define  RIO_PORT_N_ERR_STS_PW_INP_ES  0x00000100 /* Input Error-stopped */
+#define  RIO_PORT_N_ERR_STS_PW_PEND    0x00000010 /* Port-Write Pending */
+#define  RIO_PORT_N_ERR_STS_PORT_ERR   0x00000004
+#define  RIO_PORT_N_ERR_STS_PORT_OK    0x00000002
+#define  RIO_PORT_N_ERR_STS_PORT_UNINIT        0x00000001
+#define  RIO_PORT_N_ERR_STS_CLR_MASK   0x07120204
+#define RIO_PORT_N_CTL_CSR(x)          (0x005c + x*0x20)
+#define  RIO_PORT_N_CTL_PWIDTH         0xc0000000
+#define  RIO_PORT_N_CTL_PWIDTH_1       0x00000000
+#define  RIO_PORT_N_CTL_PWIDTH_4       0x40000000
+#define  RIO_PORT_N_CTL_P_TYP_SER      0x00000001
+#define  RIO_PORT_N_CTL_LOCKOUT                0x00000002
+#define  RIO_PORT_N_CTL_EN_RX_SER      0x00200000
+#define  RIO_PORT_N_CTL_EN_TX_SER      0x00400000
+#define  RIO_PORT_N_CTL_EN_RX_PAR      0x08000000
+#define  RIO_PORT_N_CTL_EN_TX_PAR      0x40000000
+
+/*
+ * Error Management Extensions (RapidIO 1.3+, Part 8)
+ *
+ * Extended Features Block ID=0x0007
+ */
+
+/* General EM Registers (Common for all Ports) */
+
+#define RIO_EM_EFB_HEADER      0x000   /* Error Management Extensions Block Header */
+#define RIO_EM_LTL_ERR_DETECT  0x008   /* Logical/Transport Layer Error Detect CSR */
+#define RIO_EM_LTL_ERR_EN      0x00c   /* Logical/Transport Layer Error Enable CSR */
+#define RIO_EM_LTL_HIADDR_CAP  0x010   /* Logical/Transport Layer High Address Capture CSR */
+#define RIO_EM_LTL_ADDR_CAP    0x014   /* Logical/Transport Layer Address Capture CSR */
+#define RIO_EM_LTL_DEVID_CAP   0x018   /* Logical/Transport Layer Device ID Capture CSR */
+#define RIO_EM_LTL_CTRL_CAP    0x01c   /* Logical/Transport Layer Control Capture CSR */
+#define RIO_EM_PW_TGT_DEVID    0x028   /* Port-write Target deviceID CSR */
+#define RIO_EM_PKT_TTL         0x02c   /* Packet Time-to-live CSR */
+
+/* Per-Port EM Registers */
+
+#define RIO_EM_PN_ERR_DETECT(x)        (0x040 + x*0x40) /* Port N Error Detect CSR */
+#define  REM_PED_IMPL_SPEC             0x80000000
+#define  REM_PED_LINK_TO               0x00000001
+#define RIO_EM_PN_ERRRATE_EN(x) (0x044 + x*0x40) /* Port N Error Rate Enable CSR */
+#define RIO_EM_PN_ATTRIB_CAP(x)        (0x048 + x*0x40) /* Port N Attributes Capture CSR */
+#define RIO_EM_PN_PKT_CAP_0(x) (0x04c + x*0x40) /* Port N Packet/Control Symbol Capture 0 CSR */
+#define RIO_EM_PN_PKT_CAP_1(x) (0x050 + x*0x40) /* Port N Packet Capture 1 CSR */
+#define RIO_EM_PN_PKT_CAP_2(x) (0x054 + x*0x40) /* Port N Packet Capture 2 CSR */
+#define RIO_EM_PN_PKT_CAP_3(x) (0x058 + x*0x40) /* Port N Packet Capture 3 CSR */
+#define RIO_EM_PN_ERRRATE(x)   (0x068 + x*0x40) /* Port N Error Rate CSR */
+#define RIO_EM_PN_ERRRATE_TR(x) (0x06c + x*0x40) /* Port N Error Rate Threshold CSR */
 
 #endif                         /* LINUX_RIO_REGS_H */
index c0151ff..f118809 100644 (file)
@@ -268,7 +268,6 @@ extern void init_idle(struct task_struct *idle, int cpu);
 extern void init_idle_bootup_task(struct task_struct *idle);
 
 extern int runqueue_is_locked(int cpu);
-extern void task_rq_unlock_wait(struct task_struct *p);
 
 extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
@@ -527,8 +526,9 @@ struct thread_group_cputimer {
  * the locking of signal_struct.
  */
 struct signal_struct {
-       atomic_t                count;
+       atomic_t                sigcnt;
        atomic_t                live;
+       int                     nr_threads;
 
        wait_queue_head_t       wait_chldexit;  /* for wait4() */
 
@@ -1423,6 +1423,7 @@ struct task_struct {
        nodemask_t mems_allowed;        /* Protected by alloc_lock */
        int mems_allowed_change_disable;
        int cpuset_mem_spread_rotor;
+       int cpuset_slab_spread_rotor;
 #endif
 #ifdef CONFIG_CGROUPS
        /* Control Group info protected by css_set_lock */
@@ -2035,7 +2036,7 @@ extern int do_notify_parent(struct task_struct *, int);
 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
-extern void zap_other_threads(struct task_struct *p);
+extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
 extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
@@ -2100,7 +2101,6 @@ extern void flush_thread(void);
 extern void exit_thread(void);
 
 extern void exit_files(struct task_struct *);
-extern void __cleanup_signal(struct signal_struct *);
 extern void __cleanup_sighand(struct sighand_struct *);
 
 extern void exit_itimers(struct signal_struct *);
@@ -2147,6 +2147,11 @@ extern bool current_is_single_threaded(void);
 #define while_each_thread(g, t) \
        while ((t = next_thread(t)) != g)
 
+static inline int get_nr_threads(struct task_struct *tsk)
+{
+       return tsk->signal->nr_threads;
+}
+
 /* de_thread depends on thread_group_leader not being a pid based check */
 #define thread_group_leader(p) (p == p->group_leader)
 
@@ -2393,10 +2398,6 @@ static inline void thread_group_cputime_init(struct signal_struct *sig)
        spin_lock_init(&sig->cputimer.lock);
 }
 
-static inline void thread_group_cputime_free(struct signal_struct *sig)
-{
-}
-
 /*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
diff --git a/include/linux/sdhci-pltfm.h b/include/linux/sdhci-pltfm.h
new file mode 100644 (file)
index 0000000..0239bd7
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Platform data declarations for the sdhci-pltfm driver.
+ *
+ * Copyright (c) 2010 MontaVista Software, LLC.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#ifndef _SDHCI_PLTFM_H
+#define _SDHCI_PLTFM_H
+
+struct sdhci_ops;
+struct sdhci_host;
+
+/**
+ * struct sdhci_pltfm_data - SDHCI platform-specific information & hooks
+ * @ops: optional pointer to the platform-provided SDHCI ops
+ * @quirks: optional SDHCI quirks
+ * @init: optional hook that is called during device probe, before the
+ *        driver tries to access any SDHCI registers
+ * @exit: optional hook that is called during device removal
+ */
+struct sdhci_pltfm_data {
+       struct sdhci_ops *ops;
+       unsigned int quirks;
+       int (*init)(struct sdhci_host *host);
+       void (*exit)(struct sdhci_host *host);
+};
+
+#endif /* _SDHCI_PLTFM_H */
index 8a4adbe..f2961af 100644 (file)
@@ -79,6 +79,7 @@ struct  seminfo {
 #ifdef __KERNEL__
 #include <asm/atomic.h>
 #include <linux/rcupdate.h>
+#include <linux/cache.h>
 
 struct task_struct;
 
@@ -91,7 +92,8 @@ struct sem {
 
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
-       struct kern_ipc_perm    sem_perm;       /* permissions .. see ipc.h */
+       struct kern_ipc_perm    ____cacheline_aligned_in_smp
+                               sem_perm;       /* permissions .. see ipc.h */
        time_t                  sem_otime;      /* last semop time */
        time_t                  sem_ctime;      /* last change time */
        struct sem              *sem_base;      /* ptr to first semaphore in array */
index 9a6f760..0299b4c 100644 (file)
@@ -73,6 +73,8 @@
 #define SFI_SIG_SPIB           "SPIB"
 #define SFI_SIG_I2CB           "I2CB"
 #define SFI_SIG_GPEM           "GPEM"
+#define SFI_SIG_DEVS           "DEVS"
+#define SFI_SIG_GPIO           "GPIO"
 
 #define SFI_SIGNATURE_SIZE     4
 #define SFI_OEM_ID_SIZE                6
@@ -145,6 +147,27 @@ struct sfi_rtc_table_entry {
        u32     irq;
 } __packed;
 
+struct sfi_device_table_entry {
+       u8      type;           /* bus type, I2C, SPI or ...*/
+#define SFI_DEV_TYPE_SPI       0
+#define SFI_DEV_TYPE_I2C       1
+#define SFI_DEV_TYPE_UART      2
+#define SFI_DEV_TYPE_HSI       3
+#define SFI_DEV_TYPE_IPC       4
+
+       u8      host_num;       /* attached to host 0, 1...*/
+       u16     addr;
+       u8      irq;
+       u32     max_freq;
+       char    name[16];
+} __packed;
+
+struct sfi_gpio_table_entry {
+       char    controller_name[16];
+       u16     pin_no;
+       char    pin_name[16];
+} __packed;
+
 struct sfi_spi_table_entry {
        u16     host_num;       /* attached to host 0, 1...*/
        u16     cs;             /* chip select */
@@ -166,7 +189,6 @@ struct sfi_gpe_table_entry {
        u16     phys_id;        /* physical GPE id */
 } __packed;
 
-
 typedef int (*sfi_table_handler) (struct sfi_table_header *table);
 
 #ifdef CONFIG_SFI
index b6b6143..ff4acea 100644 (file)
@@ -282,6 +282,11 @@ extern void kswapd_stop(int nid);
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
 #endif /* CONFIG_MMU */
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
+                                       struct page **pagep, swp_entry_t *ent);
+#endif
+
 extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
 
 #ifdef CONFIG_SWAP
index febedcf..81a4e21 100644 (file)
@@ -73,16 +73,6 @@ extern void
 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
                           int nelems, enum dma_data_direction dir);
 
-extern void
-swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-                                 unsigned long offset, size_t size,
-                                 enum dma_data_direction dir);
-
-extern void
-swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
-                                    unsigned long offset, size_t size,
-                                    enum dma_data_direction dir);
-
 extern int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
 
index 052b12b..383ab95 100644 (file)
 #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
        (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
 
+/*
+ * Define a minimum number of pids per cpu.  Heuristically based
+ * on original pid max of 32k for 32 cpus.  Also, increase the
+ * minimum settable value for pid_max on the running system based
+ * on similar defaults.  See kernel/pid.c:pidmap_init() for details.
+ */
+#define PIDS_PER_CPU_DEFAULT   1024
+#define PIDS_PER_CPU_MIN       8
+
 #endif
index 5b81156..c44df50 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/bitops.h>
 #include <linux/mmzone.h>
 #include <linux/smp.h>
+#include <linux/percpu.h>
 #include <asm/topology.h>
 
 #ifndef node_has_online_mem
@@ -203,8 +204,114 @@ int arch_update_cpu_topology(void);
 #ifndef SD_NODE_INIT
 #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
 #endif
+
 #endif /* CONFIG_NUMA */
 
+#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
+DECLARE_PER_CPU(int, numa_node);
+
+#ifndef numa_node_id
+/* Returns the number of the current Node. */
+static inline int numa_node_id(void)
+{
+       return __this_cpu_read(numa_node);
+}
+#endif
+
+#ifndef cpu_to_node
+static inline int cpu_to_node(int cpu)
+{
+       return per_cpu(numa_node, cpu);
+}
+#endif
+
+#ifndef set_numa_node
+static inline void set_numa_node(int node)
+{
+       percpu_write(numa_node, node);
+}
+#endif
+
+#ifndef set_cpu_numa_node
+static inline void set_cpu_numa_node(int cpu, int node)
+{
+       per_cpu(numa_node, cpu) = node;
+}
+#endif
+
+#else  /* !CONFIG_USE_PERCPU_NUMA_NODE_ID */
+
+/* Returns the number of the current Node. */
+#ifndef numa_node_id
+static inline int numa_node_id(void)
+{
+       return cpu_to_node(raw_smp_processor_id());
+}
+#endif
+
+#endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */
+
+#ifdef CONFIG_HAVE_MEMORYLESS_NODES
+
+/*
+ * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly.
+ * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined.
+ * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem().
+ */
+DECLARE_PER_CPU(int, _numa_mem_);
+
+#ifndef set_numa_mem
+static inline void set_numa_mem(int node)
+{
+       percpu_write(_numa_mem_, node);
+}
+#endif
+
+#ifndef numa_mem_id
+/* Returns the number of the nearest Node with memory */
+static inline int numa_mem_id(void)
+{
+       return __this_cpu_read(_numa_mem_);
+}
+#endif
+
+#ifndef cpu_to_mem
+static inline int cpu_to_mem(int cpu)
+{
+       return per_cpu(_numa_mem_, cpu);
+}
+#endif
+
+#ifndef set_cpu_numa_mem
+static inline void set_cpu_numa_mem(int cpu, int node)
+{
+       per_cpu(_numa_mem_, cpu) = node;
+}
+#endif
+
+#else  /* !CONFIG_HAVE_MEMORYLESS_NODES */
+
+static inline void set_numa_mem(int node) {}
+
+static inline void set_cpu_numa_mem(int cpu, int node) {}
+
+#ifndef numa_mem_id
+/* Returns the number of the nearest Node with memory */
+static inline int numa_mem_id(void)
+{
+       return numa_node_id();
+}
+#endif
+
+#ifndef cpu_to_mem
+static inline int cpu_to_mem(int cpu)
+{
+       return cpu_to_node(cpu);
+}
+#endif
+
+#endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */
+
 #ifndef topology_physical_package_id
 #define topology_physical_package_id(cpu)      ((void)(cpu), -1)
 #endif
@@ -218,9 +325,4 @@ int arch_update_cpu_topology(void);
 #define topology_core_cpumask(cpu)             cpumask_of(cpu)
 #endif
 
-/* Returns the number of the current Node. */
-#ifndef numa_node_id
-#define numa_node_id()         (cpu_to_node(raw_smp_processor_id()))
-#endif
-
 #endif /* _LINUX_TOPOLOGY_H */
index 15ddd44..60c81da 100644 (file)
@@ -166,11 +166,11 @@ struct uinput_ff_erase {
 struct uinput_user_dev {
        char name[UINPUT_MAX_NAME_SIZE];
        struct input_id id;
-        int ff_effects_max;
-        int absmax[ABS_MAX + 1];
-        int absmin[ABS_MAX + 1];
-        int absfuzz[ABS_MAX + 1];
-        int absflat[ABS_MAX + 1];
+       int ff_effects_max;
+       int absmax[ABS_CNT];
+       int absmin[ABS_CNT];
+       int absfuzz[ABS_CNT];
+       int absflat[ABS_CNT];
 };
 #endif /* __UINPUT_H_ */
 
index 2aa6aa3..5d60ad4 100644 (file)
@@ -353,7 +353,7 @@ TRACE_EVENT(ext4_discard_blocks,
                  jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
 );
 
-TRACE_EVENT(ext4_mb_new_inode_pa,
+DECLARE_EVENT_CLASS(ext4__mb_new_pa,
        TP_PROTO(struct ext4_allocation_context *ac,
                 struct ext4_prealloc_space *pa),
 
@@ -381,32 +381,20 @@ TRACE_EVENT(ext4_mb_new_inode_pa,
                  __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
 );
 
-TRACE_EVENT(ext4_mb_new_group_pa,
+DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
+
        TP_PROTO(struct ext4_allocation_context *ac,
                 struct ext4_prealloc_space *pa),
 
-       TP_ARGS(ac, pa),
-
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        ino_t,  ino                     )
-               __field(        __u64,  pa_pstart               )
-               __field(        __u32,  pa_len                  )
-               __field(        __u64,  pa_lstart               )
+       TP_ARGS(ac, pa)
+);
 
-       ),
+DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
 
-       TP_fast_assign(
-               __entry->dev            = ac->ac_sb->s_dev;
-               __entry->ino            = ac->ac_inode->i_ino;
-               __entry->pa_pstart      = pa->pa_pstart;
-               __entry->pa_len         = pa->pa_len;
-               __entry->pa_lstart      = pa->pa_lstart;
-       ),
+       TP_PROTO(struct ext4_allocation_context *ac,
+                struct ext4_prealloc_space *pa),
 
-       TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
-                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-                 __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
+       TP_ARGS(ac, pa)
 );
 
 TRACE_EVENT(ext4_mb_release_inode_pa,
@@ -790,7 +778,7 @@ TRACE_EVENT(ext4_mballoc_prealloc,
                  __entry->result_len, __entry->result_logical)
 );
 
-TRACE_EVENT(ext4_mballoc_discard,
+DECLARE_EVENT_CLASS(ext4__mballoc,
        TP_PROTO(struct ext4_allocation_context *ac),
 
        TP_ARGS(ac),
@@ -819,33 +807,18 @@ TRACE_EVENT(ext4_mballoc_discard,
                  __entry->result_len, __entry->result_logical)
 );
 
-TRACE_EVENT(ext4_mballoc_free,
+DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard,
+
        TP_PROTO(struct ext4_allocation_context *ac),
 
-       TP_ARGS(ac),
+       TP_ARGS(ac)
+);
 
-       TP_STRUCT__entry(
-               __field(        dev_t,  dev                     )
-               __field(        ino_t,  ino                     )
-               __field(        __u32,  result_logical          )
-               __field(          int,  result_start            )
-               __field(        __u32,  result_group            )
-               __field(          int,  result_len              )
-       ),
+DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free,
 
-       TP_fast_assign(
-               __entry->dev            = ac->ac_inode->i_sb->s_dev;
-               __entry->ino            = ac->ac_inode->i_ino;
-               __entry->result_logical = ac->ac_b_ex.fe_logical;
-               __entry->result_start   = ac->ac_b_ex.fe_start;
-               __entry->result_group   = ac->ac_b_ex.fe_group;
-               __entry->result_len     = ac->ac_b_ex.fe_len;
-       ),
+       TP_PROTO(struct ext4_allocation_context *ac),
 
-       TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
-                 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
-                 __entry->result_group, __entry->result_start,
-                 __entry->result_len, __entry->result_logical)
+       TP_ARGS(ac)
 );
 
 TRACE_EVENT(ext4_forget,
@@ -974,6 +947,39 @@ TRACE_EVENT(ext4_da_release_space,
                  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
 );
 
+DECLARE_EVENT_CLASS(ext4__bitmap_load,
+       TP_PROTO(struct super_block *sb, unsigned long group),
+
+       TP_ARGS(sb, group),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        __u32,  group                   )
+
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->group  = group;
+       ),
+
+       TP_printk("dev %s group %u",
+                 jbd2_dev_to_name(__entry->dev), __entry->group)
+);
+
+DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,
+
+       TP_PROTO(struct super_block *sb, unsigned long group),
+
+       TP_ARGS(sb, group)
+);
+
+DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load,
+
+       TP_PROTO(struct super_block *sb, unsigned long group),
+
+       TP_ARGS(sb, group)
+);
 
 #endif /* _TRACE_EXT4_H */
 
index dbef95b..506c849 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -3,56 +3,6 @@
  * Copyright (C) 1992 Krishna Balasubramanian
  * Copyright (C) 1995 Eric Schenk, Bruno Haible
  *
- * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995):
- * This code underwent a massive rewrite in order to solve some problems
- * with the original code. In particular the original code failed to
- * wake up processes that were waiting for semval to go to 0 if the
- * value went to 0 and was then incremented rapidly enough. In solving
- * this problem I have also modified the implementation so that it
- * processes pending operations in a FIFO manner, thus give a guarantee
- * that processes waiting for a lock on the semaphore won't starve
- * unless another locking process fails to unlock.
- * In addition the following two changes in behavior have been introduced:
- * - The original implementation of semop returned the value
- *   last semaphore element examined on success. This does not
- *   match the manual page specifications, and effectively
- *   allows the user to read the semaphore even if they do not
- *   have read permissions. The implementation now returns 0
- *   on success as stated in the manual page.
- * - There is some confusion over whether the set of undo adjustments
- *   to be performed at exit should be done in an atomic manner.
- *   That is, if we are attempting to decrement the semval should we queue
- *   up and wait until we can do so legally?
- *   The original implementation attempted to do this.
- *   The current implementation does not do so. This is because I don't
- *   think it is the right thing (TM) to do, and because I couldn't
- *   see a clean way to get the old behavior with the new design.
- *   The POSIX standard and SVID should be consulted to determine
- *   what behavior is mandated.
- *
- * Further notes on refinement (Christoph Rohland, December 1998):
- * - The POSIX standard says, that the undo adjustments simply should
- *   redo. So the current implementation is o.K.
- * - The previous code had two flaws:
- *   1) It actively gave the semaphore to the next waiting process
- *      sleeping on the semaphore. Since this process did not have the
- *      cpu this led to many unnecessary context switches and bad
- *      performance. Now we only check which process should be able to
- *      get the semaphore and if this process wants to reduce some
- *      semaphore value we simply wake it up without doing the
- *      operation. So it has to try to get it later. Thus e.g. the
- *      running process may reacquire the semaphore during the current
- *      time slice. If it only waits for zero or increases the semaphore,
- *      we do the operation in advance and wake it up.
- *   2) It did not wake up all zero waiting processes. We try to do
- *      better but only get the semops right which only wait for zero or
- *      increase. If there are decrement operations in the operations
- *      array we do the same as before.
- *
- * With the incarnation of O(1) scheduler, it becomes unnecessary to perform
- * check/retry algorithm for waking up blocked processes as the new scheduler
- * is better at handling thread switch than the old one.
- *
  * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
  *
  * SMP-threaded, sysctl's added
@@ -61,6 +11,8 @@
  * (c) 2001 Red Hat Inc
  * Lockless wakeup
  * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
+ * Further wakeup optimizations, documentation
+ * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
  *
  * support for audit of ipc object properties and permission changes
  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  * namespaces support
  * OpenVZ, SWsoft Inc.
  * Pavel Emelianov <xemul@openvz.org>
+ *
+ * Implementation notes: (May 2010)
+ * This file implements System V semaphores.
+ *
+ * User space visible behavior:
+ * - FIFO ordering for semop() operations (just FIFO, not starvation
+ *   protection)
+ * - multiple semaphore operations that alter the same semaphore in
+ *   one semop() are handled.
+ * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
+ *   SETALL calls.
+ * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
+ * - undo adjustments at process exit are limited to 0..SEMVMX.
+ * - namespace are supported.
+ * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
+ *   to /proc/sys/kernel/sem.
+ * - statistics about the usage are reported in /proc/sysvipc/sem.
+ *
+ * Internals:
+ * - scalability:
+ *   - all global variables are read-mostly.
+ *   - semop() calls and semctl(RMID) are synchronized by RCU.
+ *   - most operations do write operations (actually: spin_lock calls) to
+ *     the per-semaphore array structure.
+ *   Thus: Perfect SMP scaling between independent semaphore arrays.
+ *         If multiple semaphores in one array are used, then cache line
+ *         trashing on the semaphore array spinlock will limit the scaling.
+ * - semncnt and semzcnt are calculated on demand in count_semncnt() and
+ *   count_semzcnt()
+ * - the task that performs a successful semop() scans the list of all
+ *   sleeping tasks and completes any pending operations that can be fulfilled.
+ *   Semaphores are actively given to waiting tasks (necessary for FIFO).
+ *   (see update_queue())
+ * - To improve the scalability, the actual wake-up calls are performed after
+ *   dropping all locks. (see wake_up_sem_queue_prepare(),
+ *   wake_up_sem_queue_do())
+ * - All work is done by the waker, the woken up task does not have to do
+ *   anything - not even acquiring a lock or dropping a refcount.
+ * - A woken up task may not even touch the semaphore array anymore, it may
+ *   have been destroyed already by a semctl(RMID).
+ * - The synchronizations between wake-ups due to a timeout/signal and a
+ *   wake-up due to a completed semaphore operation is achieved by using an
+ *   intermediate state (IN_WAKEUP).
+ * - UNDO values are stored in an array (one per process and per
+ *   semaphore array, lazily allocated). For backwards compatibility, multiple
+ *   modes for the UNDO variables are supported (per process, per thread)
+ *   (see copy_semundo, CLONE_SYSVSEM)
+ * - There are two lists of the pending operations: a per-array list
+ *   and per-semaphore list (stored in the array). This allows to achieve FIFO
+ *   ordering without always scanning all pending operations.
+ *   The worst-case behavior is nevertheless O(N^2) for N wakeups.
  */
 
 #include <linux/slab.h>
@@ -381,7 +384,6 @@ static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
                sop--;
        }
        
-       sma->sem_otime = get_seconds();
        return 0;
 
 out_of_range:
@@ -404,25 +406,51 @@ undo:
        return result;
 }
 
-/*
- * Wake up a process waiting on the sem queue with a given error.
- * The queue is invalid (may not be accessed) after the function returns.
+/** wake_up_sem_queue_prepare(q, error): Prepare wake-up
+ * @q: queue entry that must be signaled
+ * @error: Error value for the signal
+ *
+ * Prepare the wake-up of the queue entry q.
  */
-static void wake_up_sem_queue(struct sem_queue *q, int error)
+static void wake_up_sem_queue_prepare(struct list_head *pt,
+                               struct sem_queue *q, int error)
 {
-       /*
-        * Hold preempt off so that we don't get preempted and have the
-        * wakee busy-wait until we're scheduled back on. We're holding
-        * locks here so it may not strictly be needed, however if the
-        * locks become preemptible then this prevents such a problem.
-        */
-       preempt_disable();
+       if (list_empty(pt)) {
+               /*
+                * Hold preempt off so that we don't get preempted and have the
+                * wakee busy-wait until we're scheduled back on.
+                */
+               preempt_disable();
+       }
        q->status = IN_WAKEUP;
-       wake_up_process(q->sleeper);
-       /* hands-off: q can disappear immediately after writing q->status. */
-       smp_wmb();
-       q->status = error;
-       preempt_enable();
+       q->pid = error;
+
+       list_add_tail(&q->simple_list, pt);
+}
+
+/**
+ * wake_up_sem_queue_do(pt) - do the actual wake-up
+ * @pt: list of tasks to be woken up
+ *
+ * Do the actual wake-up.
+ * The function is called without any locks held, thus the semaphore array
+ * could be destroyed already and the tasks can disappear as soon as the
+ * status is set to the actual return code.
+ */
+static void wake_up_sem_queue_do(struct list_head *pt)
+{
+       struct sem_queue *q, *t;
+       int did_something;
+
+       did_something = !list_empty(pt);
+       list_for_each_entry_safe(q, t, pt, simple_list) {
+               wake_up_process(q->sleeper);
+               /* q can disappear immediately after writing q->status. */
+               smp_wmb();
+               q->status = q->pid;
+       }
+       if (did_something)
+               preempt_enable();
 }
 
 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
@@ -434,22 +462,90 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
                sma->complex_count--;
 }
 
+/** check_restart(sma, q)
+ * @sma: semaphore array
+ * @q: the operation that just completed
+ *
+ * update_queue is O(N^2) when it restarts scanning the whole queue of
+ * waiting operations. Therefore this function checks if the restart is
+ * really necessary. It is called after a previously waiting operation
+ * was completed.
+ */
+static int check_restart(struct sem_array *sma, struct sem_queue *q)
+{
+       struct sem *curr;
+       struct sem_queue *h;
+
+       /* if the operation didn't modify the array, then no restart */
+       if (q->alter == 0)
+               return 0;
+
+       /* pending complex operations are too difficult to analyse */
+       if (sma->complex_count)
+               return 1;
+
+       /* we were a sleeping complex operation. Too difficult */
+       if (q->nsops > 1)
+               return 1;
+
+       curr = sma->sem_base + q->sops[0].sem_num;
+
+       /* No-one waits on this queue */
+       if (list_empty(&curr->sem_pending))
+               return 0;
+
+       /* the new semaphore value */
+       if (curr->semval) {
+               /* It is impossible that someone waits for the new value:
+                * - q is a previously sleeping simple operation that
+                *   altered the array. It must be a decrement, because
+                *   simple increments never sleep.
+                * - The value is not 0, thus wait-for-zero won't proceed.
+                * - If there are older (higher priority) decrements
+                *   in the queue, then they have observed the original
+                *   semval value and couldn't proceed. The operation
+                *   decremented to value - thus they won't proceed either.
+                */
+               BUG_ON(q->sops[0].sem_op >= 0);
+               return 0;
+       }
+       /*
+        * semval is 0. Check if there are wait-for-zero semops.
+        * They must be the first entries in the per-semaphore simple queue
+        */
+       h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list);
+       BUG_ON(h->nsops != 1);
+       BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num);
+
+       /* Yes, there is a wait-for-zero semop. Restart */
+       if (h->sops[0].sem_op == 0)
+               return 1;
+
+       /* Again - no-one is waiting for the new value. */
+       return 0;
+}
+
 
 /**
  * update_queue(sma, semnum): Look for tasks that can be completed.
  * @sma: semaphore array.
  * @semnum: semaphore that was modified.
+ * @pt: list head for the tasks that must be woken up.
  *
  * update_queue must be called after a semaphore in a semaphore array
  * was modified. If multiple semaphore were modified, then @semnum
  * must be set to -1.
+ * The tasks that must be woken up are added to @pt. The return code
+ * is stored in q->pid.
+ * The function return 1 if at least one semop was completed successfully.
  */
-static void update_queue(struct sem_array *sma, int semnum)
+static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
 {
        struct sem_queue *q;
        struct list_head *walk;
        struct list_head *pending_list;
        int offset;
+       int semop_completed = 0;
 
        /* if there are complex operations around, then knowing the semaphore
         * that was modified doesn't help us. Assume that multiple semaphores
@@ -469,7 +565,7 @@ static void update_queue(struct sem_array *sma, int semnum)
 again:
        walk = pending_list->next;
        while (walk != pending_list) {
-               int error, alter;
+               int error, restart;
 
                q = (struct sem_queue *)((char *)walk - offset);
                walk = walk->next;
@@ -494,22 +590,58 @@ again:
 
                unlink_queue(sma, q);
 
-               /*
-                * The next operation that must be checked depends on the type
-                * of the completed operation:
-                * - if the operation modified the array, then restart from the
-                *   head of the queue and check for threads that might be
-                *   waiting for the new semaphore values.
-                * - if the operation didn't modify the array, then just
-                *   continue.
-                */
-               alter = q->alter;
-               wake_up_sem_queue(q, error);
-               if (alter && !error)
+               if (error) {
+                       restart = 0;
+               } else {
+                       semop_completed = 1;
+                       restart = check_restart(sma, q);
+               }
+
+               wake_up_sem_queue_prepare(pt, q, error);
+               if (restart)
                        goto again;
        }
+       return semop_completed;
+}
+
+/**
+ * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
+ * @sma: semaphore array
+ * @sops: operations that were performed
+ * @nsops: number of operations
+ * @otime: force setting otime
+ * @pt: list head of the tasks that must be woken up.
+ *
+ * do_smart_update() does the required called to update_queue, based on the
+ * actual changes that were performed on the semaphore array.
+ * Note that the function does not do the actual wake-up: the caller is
+ * responsible for calling wake_up_sem_queue_do(@pt).
+ * It is safe to perform this call after dropping all locks.
+ */
+static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
+                       int otime, struct list_head *pt)
+{
+       int i;
+
+       if (sma->complex_count || sops == NULL) {
+               if (update_queue(sma, -1, pt))
+                       otime = 1;
+               goto done;
+       }
+
+       for (i = 0; i < nsops; i++) {
+               if (sops[i].sem_op > 0 ||
+                       (sops[i].sem_op < 0 &&
+                               sma->sem_base[sops[i].sem_num].semval == 0))
+                       if (update_queue(sma, sops[i].sem_num, pt))
+                               otime = 1;
+       }
+done:
+       if (otime)
+               sma->sem_otime = get_seconds();
 }
 
+
 /* The following counts are associated to each semaphore:
  *   semncnt        number of tasks waiting on semval being nonzero
  *   semzcnt        number of tasks waiting on semval being zero
@@ -572,6 +704,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
        struct sem_undo *un, *tu;
        struct sem_queue *q, *tq;
        struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
+       struct list_head tasks;
 
        /* Free the existing undo structures for this semaphore set.  */
        assert_spin_locked(&sma->sem_perm.lock);
@@ -585,15 +718,17 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
        }
 
        /* Wake up all pending processes and let them fail with EIDRM. */
+       INIT_LIST_HEAD(&tasks);
        list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
                unlink_queue(sma, q);
-               wake_up_sem_queue(q, -EIDRM);
+               wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
        }
 
        /* Remove the semaphore set from the IDR */
        sem_rmid(ns, sma);
        sem_unlock(sma);
 
+       wake_up_sem_queue_do(&tasks);
        ns->used_sems -= sma->sem_nsems;
        security_sem_free(sma);
        ipc_rcu_putref(sma);
@@ -715,11 +850,13 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
        ushort fast_sem_io[SEMMSL_FAST];
        ushort* sem_io = fast_sem_io;
        int nsems;
+       struct list_head tasks;
 
        sma = sem_lock_check(ns, semid);
        if (IS_ERR(sma))
                return PTR_ERR(sma);
 
+       INIT_LIST_HEAD(&tasks);
        nsems = sma->sem_nsems;
 
        err = -EACCES;
@@ -807,7 +944,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                }
                sma->sem_ctime = get_seconds();
                /* maybe some queued-up processes were waiting for this */
-               update_queue(sma, -1);
+               do_smart_update(sma, NULL, 0, 0, &tasks);
                err = 0;
                goto out_unlock;
        }
@@ -849,13 +986,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                curr->sempid = task_tgid_vnr(current);
                sma->sem_ctime = get_seconds();
                /* maybe some queued-up processes were waiting for this */
-               update_queue(sma, semnum);
+               do_smart_update(sma, NULL, 0, 0, &tasks);
                err = 0;
                goto out_unlock;
        }
        }
 out_unlock:
        sem_unlock(sma);
+       wake_up_sem_queue_do(&tasks);
+
 out_free:
        if(sem_io != fast_sem_io)
                ipc_free(sem_io, sizeof(ushort)*nsems);
@@ -1069,7 +1208,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
        /* step 1: figure out the size of the semaphore array */
        sma = sem_lock_check(ns, semid);
        if (IS_ERR(sma))
-               return ERR_PTR(PTR_ERR(sma));
+               return ERR_CAST(sma);
 
        nsems = sma->sem_nsems;
        sem_getref_and_unlock(sma);
@@ -1129,6 +1268,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
        struct sem_queue queue;
        unsigned long jiffies_left = 0;
        struct ipc_namespace *ns;
+       struct list_head tasks;
 
        ns = current->nsproxy->ipc_ns;
 
@@ -1177,6 +1317,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
        } else
                un = NULL;
 
+       INIT_LIST_HEAD(&tasks);
+
        sma = sem_lock_check(ns, semid);
        if (IS_ERR(sma)) {
                if (un)
@@ -1225,7 +1367,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
        error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
        if (error <= 0) {
                if (alter && error == 0)
-                       update_queue(sma, (nsops == 1) ? sops[0].sem_num : -1);
+                       do_smart_update(sma, sops, nsops, 1, &tasks);
 
                goto out_unlock_free;
        }
@@ -1302,6 +1444,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
 out_unlock_free:
        sem_unlock(sma);
+
+       wake_up_sem_queue_do(&tasks);
 out_free:
        if(sops != fast_sops)
                kfree(sops);
@@ -1362,6 +1506,7 @@ void exit_sem(struct task_struct *tsk)
        for (;;) {
                struct sem_array *sma;
                struct sem_undo *un;
+               struct list_head tasks;
                int semid;
                int i;
 
@@ -1425,10 +1570,11 @@ void exit_sem(struct task_struct *tsk)
                                semaphore->sempid = task_tgid_vnr(current);
                        }
                }
-               sma->sem_otime = get_seconds();
                /* maybe some queued-up processes were waiting for this */
-               update_queue(sma, -1);
+               INIT_LIST_HEAD(&tasks);
+               do_smart_update(sma, NULL, 0, 1, &tasks);
                sem_unlock(sma);
+               wake_up_sem_queue_do(&tasks);
 
                call_rcu(&un->rcu, free_un);
        }
index 2917750..422cb19 100644 (file)
@@ -2994,7 +2994,6 @@ static void cgroup_event_remove(struct work_struct *work)
                        remove);
        struct cgroup *cgrp = event->cgrp;
 
-       /* TODO: check return code */
        event->cft->unregister_event(cgrp, event->cft, event->eventfd);
 
        eventfd_ctx_put(event->eventfd);
index 124ad9d..3097382 100644 (file)
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 
+/*
+ * The following two API's must be used when attempting
+ * to serialize the updates to cpu_online_mask, cpu_present_mask.
+ */
+void cpu_maps_update_begin(void)
+{
+       mutex_lock(&cpu_add_remove_lock);
+}
+
+void cpu_maps_update_done(void)
+{
+       mutex_unlock(&cpu_add_remove_lock);
+}
+
 static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
 
 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
@@ -27,6 +41,8 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
  */
 static int cpu_hotplug_disabled;
 
+#ifdef CONFIG_HOTPLUG_CPU
+
 static struct {
        struct task_struct *active_writer;
        struct mutex lock; /* Synchronizes accesses to refcount, */
@@ -41,8 +57,6 @@ static struct {
        .refcount = 0,
 };
 
-#ifdef CONFIG_HOTPLUG_CPU
-
 void get_online_cpus(void)
 {
        might_sleep();
@@ -67,22 +81,6 @@ void put_online_cpus(void)
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
-#endif /* CONFIG_HOTPLUG_CPU */
-
-/*
- * The following two API's must be used when attempting
- * to serialize the updates to cpu_online_mask, cpu_present_mask.
- */
-void cpu_maps_update_begin(void)
-{
-       mutex_lock(&cpu_add_remove_lock);
-}
-
-void cpu_maps_update_done(void)
-{
-       mutex_unlock(&cpu_add_remove_lock);
-}
-
 /*
  * This ensures that the hotplug operation can begin only when the
  * refcount goes to zero.
@@ -124,6 +122,12 @@ static void cpu_hotplug_done(void)
        cpu_hotplug.active_writer = NULL;
        mutex_unlock(&cpu_hotplug.lock);
 }
+
+#else /* #if CONFIG_HOTPLUG_CPU */
+static void cpu_hotplug_begin(void) {}
+static void cpu_hotplug_done(void) {}
+#endif /* #esle #if CONFIG_HOTPLUG_CPU */
+
 /* Need to know about CPUs going up/down? */
 int __ref register_cpu_notifier(struct notifier_block *nb)
 {
@@ -134,8 +138,29 @@ int __ref register_cpu_notifier(struct notifier_block *nb)
        return ret;
 }
 
+static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
+                       int *nr_calls)
+{
+       int ret;
+
+       ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
+                                       nr_calls);
+
+       return notifier_to_errno(ret);
+}
+
+static int cpu_notify(unsigned long val, void *v)
+{
+       return __cpu_notify(val, v, -1, NULL);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 
+static void cpu_notify_nofail(unsigned long val, void *v)
+{
+       BUG_ON(cpu_notify(val, v));
+}
+
 EXPORT_SYMBOL(register_cpu_notifier);
 
 void __ref unregister_cpu_notifier(struct notifier_block *nb)
@@ -181,8 +206,7 @@ static int __ref take_cpu_down(void *_param)
        if (err < 0)
                return err;
 
-       raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
-                               param->hcpu);
+       cpu_notify(CPU_DYING | param->mod, param->hcpu);
 
        if (task_cpu(param->caller) == cpu)
                move_task_off_dead_cpu(cpu, param->caller);
@@ -212,17 +236,14 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
        cpu_hotplug_begin();
        set_cpu_active(cpu, false);
-       err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
-                                       hcpu, -1, &nr_calls);
-       if (err == NOTIFY_BAD) {
+       err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
+       if (err) {
                set_cpu_active(cpu, true);
 
                nr_calls--;
-               __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
-                                         hcpu, nr_calls, NULL);
+               __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
                printk("%s: attempt to take down CPU %u failed\n",
                                __func__, cpu);
-               err = -EINVAL;
                goto out_release;
        }
 
@@ -230,9 +251,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        if (err) {
                set_cpu_active(cpu, true);
                /* CPU didn't die: tell everyone.  Can't complain. */
-               if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
-                                           hcpu) == NOTIFY_BAD)
-                       BUG();
+               cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
 
                goto out_release;
        }
@@ -246,19 +265,14 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        __cpu_die(cpu);
 
        /* CPU is completely dead: tell everyone.  Too late to complain. */
-       if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
-                                   hcpu) == NOTIFY_BAD)
-               BUG();
+       cpu_notify_nofail(CPU_DEAD | mod, hcpu);
 
        check_for_tasks(cpu);
 
 out_release:
        cpu_hotplug_done();
-       if (!err) {
-               if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
-                                           hcpu) == NOTIFY_BAD)
-                       BUG();
-       }
+       if (!err)
+               cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
        return err;
 }
 
@@ -293,13 +307,11 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
                return -EINVAL;
 
        cpu_hotplug_begin();
-       ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
-                                                       -1, &nr_calls);
-       if (ret == NOTIFY_BAD) {
+       ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
+       if (ret) {
                nr_calls--;
                printk("%s: attempt to bring up CPU %u failed\n",
                                __func__, cpu);
-               ret = -EINVAL;
                goto out_notify;
        }
 
@@ -312,12 +324,11 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
        set_cpu_active(cpu, true);
 
        /* Now call notifier in preparation. */
-       raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
+       cpu_notify(CPU_ONLINE | mod, hcpu);
 
 out_notify:
        if (ret != 0)
-               __raw_notifier_call_chain(&cpu_chain,
-                               CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
+               __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
        cpu_hotplug_done();
 
        return ret;
@@ -481,7 +492,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
        if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
                val = CPU_STARTING_FROZEN;
 #endif /* CONFIG_PM_SLEEP_SMP */
-       raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
+       cpu_notify(val, (void *)(long)cpu);
 }
 
 #endif /* CONFIG_SMP */
index 61d6af7..02b9611 100644 (file)
@@ -2469,7 +2469,8 @@ void cpuset_unlock(void)
 }
 
 /**
- * cpuset_mem_spread_node() - On which node to begin search for a page
+ * cpuset_mem_spread_node() - On which node to begin search for a file page
+ * cpuset_slab_spread_node() - On which node to begin search for a slab page
  *
  * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
  * tasks in a cpuset with is_spread_page or is_spread_slab set),
@@ -2494,16 +2495,27 @@ void cpuset_unlock(void)
  * See kmem_cache_alloc_node().
  */
 
-int cpuset_mem_spread_node(void)
+static int cpuset_spread_node(int *rotor)
 {
        int node;
 
-       node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
+       node = next_node(*rotor, current->mems_allowed);
        if (node == MAX_NUMNODES)
                node = first_node(current->mems_allowed);
-       current->cpuset_mem_spread_rotor = node;
+       *rotor = node;
        return node;
 }
+
+int cpuset_mem_spread_node(void)
+{
+       return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
+}
+
+int cpuset_slab_spread_node(void)
+{
+       return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
+}
+
 EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
 
 /**
index 2c24870..a2d5504 100644 (file)
@@ -347,66 +347,6 @@ struct cred *prepare_exec_creds(void)
 }
 
 /*
- * prepare new credentials for the usermode helper dispatcher
- */
-struct cred *prepare_usermodehelper_creds(void)
-{
-#ifdef CONFIG_KEYS
-       struct thread_group_cred *tgcred = NULL;
-#endif
-       struct cred *new;
-
-#ifdef CONFIG_KEYS
-       tgcred = kzalloc(sizeof(*new->tgcred), GFP_ATOMIC);
-       if (!tgcred)
-               return NULL;
-#endif
-
-       new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
-       if (!new)
-               goto free_tgcred;
-
-       kdebug("prepare_usermodehelper_creds() alloc %p", new);
-
-       memcpy(new, &init_cred, sizeof(struct cred));
-
-       atomic_set(&new->usage, 1);
-       set_cred_subscribers(new, 0);
-       get_group_info(new->group_info);
-       get_uid(new->user);
-
-#ifdef CONFIG_KEYS
-       new->thread_keyring = NULL;
-       new->request_key_auth = NULL;
-       new->jit_keyring = KEY_REQKEY_DEFL_DEFAULT;
-
-       atomic_set(&tgcred->usage, 1);
-       spin_lock_init(&tgcred->lock);
-       new->tgcred = tgcred;
-#endif
-
-#ifdef CONFIG_SECURITY
-       new->security = NULL;
-#endif
-       if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
-               goto error;
-       validate_creds(new);
-
-       BUG_ON(atomic_read(&new->usage) != 1);
-       return new;
-
-error:
-       put_cred(new);
-       return NULL;
-
-free_tgcred:
-#ifdef CONFIG_KEYS
-       kfree(tgcred);
-#endif
-       return NULL;
-}
-
-/*
  * Copy credentials for the new process created by fork()
  *
  * We share if we can, but under some circumstances we have to generate a new
index 019a284..ceffc67 100644 (file)
 
 static void exit_mm(struct task_struct * tsk);
 
-static void __unhash_process(struct task_struct *p)
+static void __unhash_process(struct task_struct *p, bool group_dead)
 {
        nr_threads--;
        detach_pid(p, PIDTYPE_PID);
-       if (thread_group_leader(p)) {
+       if (group_dead) {
                detach_pid(p, PIDTYPE_PGID);
                detach_pid(p, PIDTYPE_SID);
 
@@ -79,10 +79,9 @@ static void __unhash_process(struct task_struct *p)
 static void __exit_signal(struct task_struct *tsk)
 {
        struct signal_struct *sig = tsk->signal;
+       bool group_dead = thread_group_leader(tsk);
        struct sighand_struct *sighand;
-
-       BUG_ON(!sig);
-       BUG_ON(!atomic_read(&sig->count));
+       struct tty_struct *uninitialized_var(tty);
 
        sighand = rcu_dereference_check(tsk->sighand,
                                        rcu_read_lock_held() ||
@@ -90,14 +89,16 @@ static void __exit_signal(struct task_struct *tsk)
        spin_lock(&sighand->siglock);
 
        posix_cpu_timers_exit(tsk);
-       if (atomic_dec_and_test(&sig->count))
+       if (group_dead) {
                posix_cpu_timers_exit_group(tsk);
-       else {
+               tty = sig->tty;
+               sig->tty = NULL;
+       } else {
                /*
                 * If there is any task waiting for the group exit
                 * then notify it:
                 */
-               if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count)
+               if (sig->notify_count > 0 && !--sig->notify_count)
                        wake_up_process(sig->group_exit_task);
 
                if (tsk == sig->curr_target)
@@ -123,32 +124,24 @@ static void __exit_signal(struct task_struct *tsk)
                sig->oublock += task_io_get_oublock(tsk);
                task_io_accounting_add(&sig->ioac, &tsk->ioac);
                sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-               sig = NULL; /* Marker for below. */
        }
 
-       __unhash_process(tsk);
+       sig->nr_threads--;
+       __unhash_process(tsk, group_dead);
 
        /*
         * Do this under ->siglock, we can race with another thread
         * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
         */
        flush_sigqueue(&tsk->pending);
-
-       tsk->signal = NULL;
        tsk->sighand = NULL;
        spin_unlock(&sighand->siglock);
 
        __cleanup_sighand(sighand);
        clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
-       if (sig) {
+       if (group_dead) {
                flush_sigqueue(&sig->shared_pending);
-               taskstats_tgid_free(sig);
-               /*
-                * Make sure ->signal can't go away under rq->lock,
-                * see account_group_exec_runtime().
-                */
-               task_rq_unlock_wait(tsk);
-               __cleanup_signal(sig);
+               tty_kref_put(tty);
        }
 }
 
@@ -856,12 +849,9 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 
        tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE;
 
-       /* mt-exec, de_thread() is waiting for us */
-       if (thread_group_leader(tsk) &&
-           tsk->signal->group_exit_task &&
-           tsk->signal->notify_count < 0)
+       /* mt-exec, de_thread() is waiting for group leader */
+       if (unlikely(tsk->signal->notify_count < 0))
                wake_up_process(tsk->signal->group_exit_task);
-
        write_unlock_irq(&tasklist_lock);
 
        tracehook_report_death(tsk, signal, cookie, group_dead);
index 4d57d9e..bf9fef6 100644 (file)
@@ -165,6 +165,18 @@ void free_task(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(free_task);
 
+static inline void free_signal_struct(struct signal_struct *sig)
+{
+       taskstats_tgid_free(sig);
+       kmem_cache_free(signal_cachep, sig);
+}
+
+static inline void put_signal_struct(struct signal_struct *sig)
+{
+       if (atomic_dec_and_test(&sig->sigcnt))
+               free_signal_struct(sig);
+}
+
 void __put_task_struct(struct task_struct *tsk)
 {
        WARN_ON(!tsk->exit_state);
@@ -173,6 +185,7 @@ void __put_task_struct(struct task_struct *tsk)
 
        exit_creds(tsk);
        delayacct_tsk_free(tsk);
+       put_signal_struct(tsk->signal);
 
        if (!profile_handoff_task(tsk))
                free_task(tsk);
@@ -864,8 +877,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        if (!sig)
                return -ENOMEM;
 
-       atomic_set(&sig->count, 1);
+       sig->nr_threads = 1;
        atomic_set(&sig->live, 1);
+       atomic_set(&sig->sigcnt, 1);
        init_waitqueue_head(&sig->wait_chldexit);
        if (clone_flags & CLONE_NEWPID)
                sig->flags |= SIGNAL_UNKILLABLE;
@@ -889,13 +903,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        return 0;
 }
 
-void __cleanup_signal(struct signal_struct *sig)
-{
-       thread_group_cputime_free(sig);
-       tty_kref_put(sig->tty);
-       kmem_cache_free(signal_cachep, sig);
-}
-
 static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
        unsigned long new_flags = p->flags;
@@ -1079,6 +1086,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        }
        mpol_fix_fork_child_flag(p);
 #endif
+#ifdef CONFIG_CPUSETS
+       p->cpuset_mem_spread_rotor = node_random(p->mems_allowed);
+       p->cpuset_slab_spread_rotor = node_random(p->mems_allowed);
+#endif
 #ifdef CONFIG_TRACE_IRQFLAGS
        p->irq_events = 0;
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
@@ -1245,8 +1256,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        }
 
        if (clone_flags & CLONE_THREAD) {
-               atomic_inc(&current->signal->count);
+               current->signal->nr_threads++;
                atomic_inc(&current->signal->live);
+               atomic_inc(&current->signal->sigcnt);
                p->group_leader = current->group_leader;
                list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
        }
@@ -1259,7 +1271,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                                p->nsproxy->pid_ns->child_reaper = p;
 
                        p->signal->leader_pid = pid;
-                       tty_kref_put(p->signal->tty);
                        p->signal->tty = tty_kref_get(current->signal->tty);
                        attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
                        attach_pid(p, PIDTYPE_SID, task_session(current));
@@ -1292,7 +1303,7 @@ bad_fork_cleanup_mm:
                mmput(p->mm);
 bad_fork_cleanup_signal:
        if (!(clone_flags & CLONE_THREAD))
-               __cleanup_signal(p->signal);
+               free_signal_struct(p->signal);
 bad_fork_cleanup_sighand:
        __cleanup_sighand(p->sighand);
 bad_fork_cleanup_fs:
@@ -1327,6 +1338,16 @@ noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_re
        return regs;
 }
 
+static inline void init_idle_pids(struct pid_link *links)
+{
+       enum pid_type type;
+
+       for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
+               INIT_HLIST_NODE(&links[type].node); /* not really needed */
+               links[type].pid = &init_struct_pid;
+       }
+}
+
 struct task_struct * __cpuinit fork_idle(int cpu)
 {
        struct task_struct *task;
@@ -1334,8 +1355,10 @@ struct task_struct * __cpuinit fork_idle(int cpu)
 
        task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
                            &init_struct_pid, 0);
-       if (!IS_ERR(task))
+       if (!IS_ERR(task)) {
+               init_idle_pids(task->pids);
                init_idle(task, cpu);
+       }
 
        return task;
 }
@@ -1507,14 +1530,6 @@ static void check_unshare_flags(unsigned long *flags_ptr)
                *flags_ptr |= CLONE_SIGHAND;
 
        /*
-        * If unsharing signal handlers and the task was created
-        * using CLONE_THREAD, then must unshare the thread
-        */
-       if ((*flags_ptr & CLONE_SIGHAND) &&
-           (atomic_read(&current->signal->count) > 1))
-               *flags_ptr |= CLONE_THREAD;
-
-       /*
         * If unsharing namespace, must also unshare filesystem information.
         */
        if (*flags_ptr & CLONE_NEWNS)
index bf0e231..6e9b196 100644 (file)
@@ -116,27 +116,16 @@ int __request_module(bool wait, const char *fmt, ...)
 
        trace_module_request(module_name, wait, _RET_IP_);
 
-       ret = call_usermodehelper(modprobe_path, argv, envp,
-                       wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
+       ret = call_usermodehelper_fns(modprobe_path, argv, envp,
+                       wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC,
+                       NULL, NULL, NULL);
+
        atomic_dec(&kmod_concurrent);
        return ret;
 }
 EXPORT_SYMBOL(__request_module);
 #endif /* CONFIG_MODULES */
 
-struct subprocess_info {
-       struct work_struct work;
-       struct completion *complete;
-       struct cred *cred;
-       char *path;
-       char **argv;
-       char **envp;
-       enum umh_wait wait;
-       int retval;
-       struct file *stdin;
-       void (*cleanup)(char **argv, char **envp);
-};
-
 /*
  * This is the task which runs the usermode application
  */
@@ -145,36 +134,10 @@ static int ____call_usermodehelper(void *data)
        struct subprocess_info *sub_info = data;
        int retval;
 
-       BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
-
-       /* Unblock all signals */
        spin_lock_irq(&current->sighand->siglock);
        flush_signal_handlers(current, 1);
-       sigemptyset(&current->blocked);
-       recalc_sigpending();
        spin_unlock_irq(&current->sighand->siglock);
 
-       /* Install the credentials */
-       commit_creds(sub_info->cred);
-       sub_info->cred = NULL;
-
-       /* Install input pipe when needed */
-       if (sub_info->stdin) {
-               struct files_struct *f = current->files;
-               struct fdtable *fdt;
-               /* no races because files should be private here */
-               sys_close(0);
-               fd_install(0, sub_info->stdin);
-               spin_lock(&f->file_lock);
-               fdt = files_fdtable(f);
-               FD_SET(0, fdt->open_fds);
-               FD_CLR(0, fdt->close_on_exec);
-               spin_unlock(&f->file_lock);
-
-               /* and disallow core files too */
-               current->signal->rlim[RLIMIT_CORE] = (struct rlimit){0, 0};
-       }
-
        /* We can run anywhere, unlike our parent keventd(). */
        set_cpus_allowed_ptr(current, cpu_all_mask);
 
@@ -184,9 +147,16 @@ static int ____call_usermodehelper(void *data)
         */
        set_user_nice(current, 0);
 
+       if (sub_info->init) {
+               retval = sub_info->init(sub_info);
+               if (retval)
+                       goto fail;
+       }
+
        retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
 
        /* Exec failed? */
+fail:
        sub_info->retval = retval;
        do_exit(0);
 }
@@ -194,9 +164,7 @@ static int ____call_usermodehelper(void *data)
 void call_usermodehelper_freeinfo(struct subprocess_info *info)
 {
        if (info->cleanup)
-               (*info->cleanup)(info->argv, info->envp);
-       if (info->cred)
-               put_cred(info->cred);
+               (*info->cleanup)(info);
        kfree(info);
 }
 EXPORT_SYMBOL(call_usermodehelper_freeinfo);
@@ -207,16 +175,16 @@ static int wait_for_helper(void *data)
        struct subprocess_info *sub_info = data;
        pid_t pid;
 
-       /* Install a handler: if SIGCLD isn't handled sys_wait4 won't
-        * populate the status, but will return -ECHILD. */
-       allow_signal(SIGCHLD);
+       /* If SIGCLD is ignored sys_wait4 won't populate the status. */
+       spin_lock_irq(&current->sighand->siglock);
+       current->sighand->action[SIGCHLD-1].sa.sa_handler = SIG_DFL;
+       spin_unlock_irq(&current->sighand->siglock);
 
        pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
        if (pid < 0) {
                sub_info->retval = pid;
        } else {
-               int ret;
-
+               int ret = -ECHILD;
                /*
                 * Normally it is bogus to call wait4() from in-kernel because
                 * wait4() wants to write the exit code to a userspace address.
@@ -237,10 +205,7 @@ static int wait_for_helper(void *data)
                        sub_info->retval = ret;
        }
 
-       if (sub_info->wait == UMH_NO_WAIT)
-               call_usermodehelper_freeinfo(sub_info);
-       else
-               complete(sub_info->complete);
+       complete(sub_info->complete);
        return 0;
 }
 
@@ -249,15 +214,13 @@ static void __call_usermodehelper(struct work_struct *work)
 {
        struct subprocess_info *sub_info =
                container_of(work, struct subprocess_info, work);
-       pid_t pid;
        enum umh_wait wait = sub_info->wait;
-
-       BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+       pid_t pid;
 
        /* CLONE_VFORK: wait until the usermode helper has execve'd
         * successfully We need the data structures to stay around
         * until that is done.  */
-       if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT)
+       if (wait == UMH_WAIT_PROC)
                pid = kernel_thread(wait_for_helper, sub_info,
                                    CLONE_FS | CLONE_FILES | SIGCHLD);
        else
@@ -266,15 +229,16 @@ static void __call_usermodehelper(struct work_struct *work)
 
        switch (wait) {
        case UMH_NO_WAIT:
+               call_usermodehelper_freeinfo(sub_info);
                break;
 
        case UMH_WAIT_PROC:
                if (pid > 0)
                        break;
-               sub_info->retval = pid;
                /* FALLTHROUGH */
-
        case UMH_WAIT_EXEC:
+               if (pid < 0)
+                       sub_info->retval = pid;
                complete(sub_info->complete);
        }
 }
@@ -376,80 +340,37 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
        sub_info->path = path;
        sub_info->argv = argv;
        sub_info->envp = envp;
-       sub_info->cred = prepare_usermodehelper_creds();
-       if (!sub_info->cred) {
-               kfree(sub_info);
-               return NULL;
-       }
-
   out:
        return sub_info;
 }
 EXPORT_SYMBOL(call_usermodehelper_setup);
 
 /**
- * call_usermodehelper_setkeys - set the session keys for usermode helper
- * @info: a subprocess_info returned by call_usermodehelper_setup
- * @session_keyring: the session keyring for the process
- */
-void call_usermodehelper_setkeys(struct subprocess_info *info,
-                                struct key *session_keyring)
-{
-#ifdef CONFIG_KEYS
-       struct thread_group_cred *tgcred = info->cred->tgcred;
-       key_put(tgcred->session_keyring);
-       tgcred->session_keyring = key_get(session_keyring);
-#else
-       BUG();
-#endif
-}
-EXPORT_SYMBOL(call_usermodehelper_setkeys);
-
-/**
- * call_usermodehelper_setcleanup - set a cleanup function
+ * call_usermodehelper_setfns - set a cleanup/init function
  * @info: a subprocess_info returned by call_usermodehelper_setup
  * @cleanup: a cleanup function
+ * @init: an init function
+ * @data: arbitrary context sensitive data
  *
- * The cleanup function is just befor ethe subprocess_info is about to
+ * The init function is used to customize the helper process prior to
+ * exec.  A non-zero return code causes the process to error out, exit,
+ * and return the failure to the calling process
+ *
+ * The cleanup function is just before ethe subprocess_info is about to
  * be freed.  This can be used for freeing the argv and envp.  The
  * Function must be runnable in either a process context or the
  * context in which call_usermodehelper_exec is called.
  */
-void call_usermodehelper_setcleanup(struct subprocess_info *info,
-                                   void (*cleanup)(char **argv, char **envp))
+void call_usermodehelper_setfns(struct subprocess_info *info,
+                   int (*init)(struct subprocess_info *info),
+                   void (*cleanup)(struct subprocess_info *info),
+                   void *data)
 {
        info->cleanup = cleanup;
+       info->init = init;
+       info->data = data;
 }
-EXPORT_SYMBOL(call_usermodehelper_setcleanup);
-
-/**
- * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin
- * @sub_info: a subprocess_info returned by call_usermodehelper_setup
- * @filp: set to the write-end of a pipe
- *
- * This constructs a pipe, and sets the read end to be the stdin of the
- * subprocess, and returns the write-end in *@filp.
- */
-int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
-                                 struct file **filp)
-{
-       struct file *f;
-
-       f = create_write_pipe(0);
-       if (IS_ERR(f))
-               return PTR_ERR(f);
-       *filp = f;
-
-       f = create_read_pipe(f, 0);
-       if (IS_ERR(f)) {
-               free_write_pipe(*filp);
-               return PTR_ERR(f);
-       }
-       sub_info->stdin = f;
-
-       return 0;
-}
-EXPORT_SYMBOL(call_usermodehelper_stdinpipe);
+EXPORT_SYMBOL(call_usermodehelper_setfns);
 
 /**
  * call_usermodehelper_exec - start a usermode application
@@ -469,9 +390,6 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
        DECLARE_COMPLETION_ONSTACK(done);
        int retval = 0;
 
-       BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
-       validate_creds(sub_info->cred);
-
        helper_lock();
        if (sub_info->path[0] == '\0')
                goto out;
@@ -498,41 +416,6 @@ unlock:
 }
 EXPORT_SYMBOL(call_usermodehelper_exec);
 
-/**
- * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin
- * @path: path to usermode executable
- * @argv: arg vector for process
- * @envp: environment for process
- * @filp: set to the write-end of a pipe
- *
- * This is a simple wrapper which executes a usermode-helper function
- * with a pipe as stdin.  It is implemented entirely in terms of
- * lower-level call_usermodehelper_* functions.
- */
-int call_usermodehelper_pipe(char *path, char **argv, char **envp,
-                            struct file **filp)
-{
-       struct subprocess_info *sub_info;
-       int ret;
-
-       sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
-       if (sub_info == NULL)
-               return -ENOMEM;
-
-       ret = call_usermodehelper_stdinpipe(sub_info, filp);
-       if (ret < 0) {
-               call_usermodehelper_freeinfo(sub_info);
-               return ret;
-       }
-
-       ret = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
-       if (ret < 0)    /* Failed to execute helper, close pipe */
-               filp_close(*filp, NULL);
-
-       return ret;
-}
-EXPORT_SYMBOL(call_usermodehelper_pipe);
-
 void __init usermodehelper_init(void)
 {
        khelper_wq = create_singlethread_workqueue("khelper");
index b1c9857..fdd8ae6 100644 (file)
@@ -659,7 +659,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
                err = __padata_add_cpu(pinst, cpu);
                mutex_unlock(&pinst->lock);
                if (err)
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(err);
                break;
 
        case CPU_DOWN_PREPARE:
@@ -670,7 +670,7 @@ static int padata_cpu_callback(struct notifier_block *nfb,
                err = __padata_remove_cpu(pinst, cpu);
                mutex_unlock(&pinst->lock);
                if (err)
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(err);
                break;
 
        case CPU_UP_CANCELED:
index dbe13db..3b16cd9 100644 (file)
@@ -87,6 +87,7 @@ NORET_TYPE void panic(const char * fmt, ...)
         */
        preempt_disable();
 
+       console_verbose();
        bust_spinlocks(1);
        va_start(args, fmt);
        vsnprintf(buf, sizeof(buf), fmt, args);
index aebb30d..e9fd8c1 100644 (file)
@@ -513,6 +513,13 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
+       /* bump default and minimum pid_max based on number of cpus */
+       pid_max = min(pid_max_max, max_t(int, pid_max,
+                               PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
+       pid_max_min = max_t(int, pid_max_min,
+                               PIDS_PER_CPU_MIN * num_possible_cpus());
+       pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
+
        init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
        /* Reserve PID 0. We never call free_pidmap(0) */
        set_bit(0, init_pid_ns.pidmap[0].page);
index 00bb252..9829646 100644 (file)
@@ -363,7 +363,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
                                }
                        } else {
                                read_lock(&tasklist_lock);
-                               if (thread_group_leader(p) && p->signal) {
+                               if (thread_group_leader(p) && p->sighand) {
                                        error =
                                            cpu_clock_sample_group(which_clock,
                                                                   p, &rtn);
@@ -439,7 +439,7 @@ int posix_cpu_timer_del(struct k_itimer *timer)
 
        if (likely(p != NULL)) {
                read_lock(&tasklist_lock);
-               if (unlikely(p->signal == NULL)) {
+               if (unlikely(p->sighand == NULL)) {
                        /*
                         * We raced with the reaping of the task.
                         * The deletion should have cleared us off the list.
@@ -691,10 +691,10 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
        read_lock(&tasklist_lock);
        /*
         * We need the tasklist_lock to protect against reaping that
-        * clears p->signal.  If p has just been reaped, we can no
+        * clears p->sighand.  If p has just been reaped, we can no
         * longer get any information about it at all.
         */
-       if (unlikely(p->signal == NULL)) {
+       if (unlikely(p->sighand == NULL)) {
                read_unlock(&tasklist_lock);
                put_task_struct(p);
                timer->it.cpu.task = NULL;
@@ -863,7 +863,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
                clear_dead = p->exit_state;
        } else {
                read_lock(&tasklist_lock);
-               if (unlikely(p->signal == NULL)) {
+               if (unlikely(p->sighand == NULL)) {
                        /*
                         * The process has been reaped.
                         * We can't even collect a sample any more.
@@ -1199,7 +1199,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
                spin_lock(&p->sighand->siglock);
        } else {
                read_lock(&tasklist_lock);
-               if (unlikely(p->signal == NULL)) {
+               if (unlikely(p->sighand == NULL)) {
                        /*
                         * The process has been reaped.
                         * We can't even collect a sample any more.
index dfadc5b..b22a899 100644 (file)
@@ -365,14 +365,14 @@ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-               node = cpu_to_node(cpu);
+               node = cpu_to_mem(cpu);
                per_cpu(cpu_profile_flip, cpu) = 0;
                if (!per_cpu(cpu_profile_hits, cpu)[1]) {
                        page = alloc_pages_exact_node(node,
                                        GFP_KERNEL | __GFP_ZERO,
                                        0);
                        if (!page)
-                               return NOTIFY_BAD;
+                               return notifier_from_errno(-ENOMEM);
                        per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
                }
                if (!per_cpu(cpu_profile_hits, cpu)[0]) {
@@ -388,7 +388,7 @@ out_free:
                page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
                per_cpu(cpu_profile_hits, cpu)[1] = NULL;
                __free_page(page);
-               return NOTIFY_BAD;
+               return notifier_from_errno(-ENOMEM);
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                if (prof_cpu_mask != NULL)
@@ -567,7 +567,7 @@ static int create_hash_tables(void)
        int cpu;
 
        for_each_online_cpu(cpu) {
-               int node = cpu_to_node(cpu);
+               int node = cpu_to_mem(cpu);
                struct page *page;
 
                page = alloc_pages_exact_node(node,
index 6af9cdd..74a3d69 100644 (file)
@@ -594,6 +594,32 @@ int ptrace_request(struct task_struct *child, long request,
                ret = ptrace_detach(child, data);
                break;
 
+#ifdef CONFIG_BINFMT_ELF_FDPIC
+       case PTRACE_GETFDPIC: {
+               struct mm_struct *mm = get_task_mm(child);
+               unsigned long tmp = 0;
+
+               ret = -ESRCH;
+               if (!mm)
+                       break;
+
+               switch (addr) {
+               case PTRACE_GETFDPIC_EXEC:
+                       tmp = mm->context.exec_fdpic_loadmap;
+                       break;
+               case PTRACE_GETFDPIC_INTERP:
+                       tmp = mm->context.interp_fdpic_loadmap;
+                       break;
+               default:
+                       break;
+               }
+               mmput(mm);
+
+               ret = put_user(tmp, (unsigned long __user *) data);
+               break;
+       }
+#endif
+
 #ifdef PTRACE_SINGLESTEP
        case PTRACE_SINGLESTEP:
 #endif
index 4268287..c7cf397 100644 (file)
@@ -539,7 +539,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
                                        "relay_hotcpu_callback: cpu %d buffer "
                                        "creation failed\n", hotcpu);
                                mutex_unlock(&relay_channels_mutex);
-                               return NOTIFY_BAD;
+                               return notifier_from_errno(-ENOMEM);
                        }
                }
                mutex_unlock(&relay_channels_mutex);
index 054a601..15b93f6 100644 (file)
@@ -969,14 +969,6 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
        }
 }
 
-void task_rq_unlock_wait(struct task_struct *p)
-{
-       struct rq *rq = task_rq(p);
-
-       smp_mb(); /* spin-unlock-wait is not a full memory barrier */
-       raw_spin_unlock_wait(&rq->lock);
-}
-
 static void __task_rq_unlock(struct rq *rq)
        __releases(rq->lock)
 {
index 87a330a..3556539 100644 (file)
@@ -381,15 +381,9 @@ __initcall(init_sched_debug_procfs);
 void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 {
        unsigned long nr_switches;
-       unsigned long flags;
-       int num_threads = 1;
-
-       if (lock_task_sighand(p, &flags)) {
-               num_threads = atomic_read(&p->signal->count);
-               unlock_task_sighand(p, &flags);
-       }
 
-       SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads);
+       SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid,
+                                               get_nr_threads(p));
        SEQ_printf(m,
                "---------------------------------------------------------\n");
 #define __P(F) \
index 825a3f2..906ae5a 100644 (file)
@@ -642,7 +642,7 @@ static inline bool si_fromuser(const struct siginfo *info)
 static int check_kill_permission(int sig, struct siginfo *info,
                                 struct task_struct *t)
 {
-       const struct cred *cred = current_cred(), *tcred;
+       const struct cred *cred, *tcred;
        struct pid *sid;
        int error;
 
@@ -656,8 +656,10 @@ static int check_kill_permission(int sig, struct siginfo *info,
        if (error)
                return error;
 
+       cred = current_cred();
        tcred = __task_cred(t);
-       if ((cred->euid ^ tcred->suid) &&
+       if (!same_thread_group(current, t) &&
+           (cred->euid ^ tcred->suid) &&
            (cred->euid ^ tcred->uid) &&
            (cred->uid  ^ tcred->suid) &&
            (cred->uid  ^ tcred->uid) &&
@@ -1083,23 +1085,24 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 /*
  * Nuke all other threads in the group.
  */
-void zap_other_threads(struct task_struct *p)
+int zap_other_threads(struct task_struct *p)
 {
-       struct task_struct *t;
+       struct task_struct *t = p;
+       int count = 0;
 
        p->signal->group_stop_count = 0;
 
-       for (t = next_thread(p); t != p; t = next_thread(t)) {
-               /*
-                * Don't bother with already dead threads
-                */
+       while_each_thread(p, t) {
+               count++;
+
+               /* Don't bother with already dead threads */
                if (t->exit_state)
                        continue;
-
-               /* SIGKILL will be handled before any pending SIGSTOP */
                sigaddset(&t->pending.signal, SIGKILL);
                signal_wake_up(t, 1);
        }
+
+       return count;
 }
 
 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
index 3fc6973..75c970c 100644 (file)
@@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
        case CPU_UP_PREPARE_FROZEN:
                if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
                                cpu_to_node(cpu)))
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(-ENOMEM);
                break;
 
 #ifdef CONFIG_HOTPLUG_CPU
index 0db913a..825e112 100644 (file)
@@ -808,7 +808,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
                p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
                if (IS_ERR(p)) {
                        printk("ksoftirqd for %i failed\n", hotcpu);
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(PTR_ERR(p));
                }
                kthread_bind(p, hotcpu);
                per_cpu(ksoftirqd, hotcpu) = p;
index 0d36d88..e83ddbb 100644 (file)
@@ -1632,9 +1632,9 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
 
 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
 
-static void argv_cleanup(char **argv, char **envp)
+static void argv_cleanup(struct subprocess_info *info)
 {
-       argv_free(argv);
+       argv_free(info->argv);
 }
 
 /**
@@ -1668,7 +1668,7 @@ int orderly_poweroff(bool force)
                goto out;
        }
 
-       call_usermodehelper_setcleanup(info, argv_cleanup);
+       call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
 
        ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
 
index be394af..e3b8c69 100644 (file)
@@ -1680,11 +1680,14 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
                                unsigned long action, void *hcpu)
 {
        long cpu = (long)hcpu;
+       int err;
+
        switch(action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-               if (init_timers_cpu(cpu) < 0)
-                       return NOTIFY_BAD;
+               err = init_timers_cpu(cpu);
+               if (err < 0)
+                       return notifier_from_errno(err);
                break;
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_DEAD:
index 77dabbf..327d2de 100644 (file)
@@ -1110,7 +1110,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
        unsigned int cpu = (unsigned long)hcpu;
        struct cpu_workqueue_struct *cwq;
        struct workqueue_struct *wq;
-       int ret = NOTIFY_OK;
+       int err = 0;
 
        action &= ~CPU_TASKS_FROZEN;
 
@@ -1124,12 +1124,13 @@ undo:
 
                switch (action) {
                case CPU_UP_PREPARE:
-                       if (!create_workqueue_thread(cwq, cpu))
+                       err = create_workqueue_thread(cwq, cpu);
+                       if (!err)
                                break;
                        printk(KERN_ERR "workqueue [%s] for %i failed\n",
                                wq->name, cpu);
                        action = CPU_UP_CANCELED;
-                       ret = NOTIFY_BAD;
+                       err = -ENOMEM;
                        goto undo;
 
                case CPU_ONLINE:
@@ -1150,7 +1151,7 @@ undo:
                cpumask_clear_cpu(cpu, cpu_populated_map);
        }
 
-       return ret;
+       return notifier_from_errno(err);
 }
 
 #ifdef CONFIG_SMP
index 2312089..e722e9d 100644 (file)
@@ -898,6 +898,18 @@ config LKDTM
        Documentation on how to use the module can be found in
        Documentation/fault-injection/provoke-crashes.txt
 
+config CPU_NOTIFIER_ERROR_INJECT
+       tristate "CPU notifier error injection module"
+       depends on HOTPLUG_CPU && DEBUG_KERNEL
+       help
+         This option provides a kernel module that can be used to test
+         the error handling of the cpu notifiers
+
+         To compile this code as a module, choose M here: the module will
+         be called cpu-notifier-error-inject.
+
+         If unsure, say N.
+
 config FAULT_INJECTION
        bool "Fault-injection framework"
        depends on DEBUG_KERNEL
index 9e6d3c2..c8567a5 100644 (file)
@@ -85,6 +85,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
+obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
index ffb78c9..d7137e7 100644 (file)
@@ -672,7 +672,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
  *
  * The bit positions 0 through @bits are valid positions in @buf.
  */
-static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
+int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
 {
        int pos = 0;
 
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
new file mode 100644 (file)
index 0000000..4dc2032
--- /dev/null
@@ -0,0 +1,63 @@
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+
+static int priority;
+static int cpu_up_prepare_error;
+static int cpu_down_prepare_error;
+
+module_param(priority, int, 0);
+MODULE_PARM_DESC(priority, "specify cpu notifier priority");
+
+module_param(cpu_up_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_up_prepare_error,
+               "specify error code to inject CPU_UP_PREPARE action");
+
+module_param(cpu_down_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_down_prepare_error,
+               "specify error code to inject CPU_DOWN_PREPARE action");
+
+static int err_inject_cpu_callback(struct notifier_block *nfb,
+                               unsigned long action, void *hcpu)
+{
+       int err = 0;
+
+       switch (action) {
+       case CPU_UP_PREPARE:
+       case CPU_UP_PREPARE_FROZEN:
+               err = cpu_up_prepare_error;
+               break;
+       case CPU_DOWN_PREPARE:
+       case CPU_DOWN_PREPARE_FROZEN:
+               err = cpu_down_prepare_error;
+               break;
+       }
+       if (err)
+               printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err);
+
+       return notifier_from_errno(err);
+}
+
+static struct notifier_block err_inject_cpu_notifier = {
+       .notifier_call = err_inject_cpu_callback,
+};
+
+static int err_inject_init(void)
+{
+       err_inject_cpu_notifier.priority = priority;
+
+       return register_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+static void err_inject_exit(void)
+{
+       unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_DESCRIPTION("CPU notifier error injection module");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
index 422a9d5..c1a2069 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -445,6 +445,7 @@ EXPORT_SYMBOL(idr_remove);
 void idr_remove_all(struct idr *idp)
 {
        int n, id, max;
+       int bt_mask;
        struct idr_layer *p;
        struct idr_layer *pa[MAX_LEVEL];
        struct idr_layer **paa = &pa[0];
@@ -462,8 +463,10 @@ void idr_remove_all(struct idr *idp)
                        p = p->ary[(id >> n) & IDR_MASK];
                }
 
+               bt_mask = id;
                id += 1 << n;
-               while (n < fls(id)) {
+               /* Get the highest bit that the above add changed from 0->1. */
+               while (n < fls(id ^ bt_mask)) {
                        if (p)
                                free_layer(p);
                        n += IDR_BITS;
index 2a087e0..05da38b 100644 (file)
@@ -656,7 +656,7 @@ EXPORT_SYMBOL(radix_tree_next_hole);
  *
  *     Returns: the index of the hole if found, otherwise returns an index
  *     outside of the set specified (in which case 'index - return >= max_scan'
- *     will be true). In rare cases of wrap-around, LONG_MAX will be returned.
+ *     will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
  *
  *     radix_tree_next_hole may be called under rcu_read_lock. However, like
  *     radix_tree_gang_lookup, this will not atomically search a snapshot of
@@ -674,7 +674,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
                if (!radix_tree_lookup(root, index))
                        break;
                index--;
-               if (index == LONG_MAX)
+               if (index == ULONG_MAX)
                        break;
        }
 
index 217d5c4..870dc3f 100644 (file)
 #include <linux/jiffies.h>
 #include <linux/random.h>
 
-struct rnd_state {
-       u32 s1, s2, s3;
-};
-
 static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
 
-static u32 __random32(struct rnd_state *state)
+/**
+ *     prandom32 - seeded pseudo-random number generator.
+ *     @state: pointer to state structure holding seeded state.
+ *
+ *     This is used for pseudo-randomness with no outside seeding.
+ *     For more random results, use random32().
+ */
+u32 prandom32(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
 
@@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state)
 
        return (state->s1 ^ state->s2 ^ state->s3);
 }
-
-/*
- * Handle minimum values for seeds
- */
-static inline u32 __seed(u32 x, u32 m)
-{
-       return (x < m) ? x + m : x;
-}
+EXPORT_SYMBOL(prandom32);
 
 /**
  *     random32 - pseudo random number generator
@@ -75,7 +71,7 @@ u32 random32(void)
 {
        unsigned long r;
        struct rnd_state *state = &get_cpu_var(net_rand_state);
-       r = __random32(state);
+       r = prandom32(state);
        put_cpu_var(state);
        return r;
 }
@@ -118,12 +114,12 @@ static int __init random32_init(void)
                state->s3 = __seed(LCG(state->s2), 15);
 
                /* "warm it up" */
-               __random32(state);
-               __random32(state);
-               __random32(state);
-               __random32(state);
-               __random32(state);
-               __random32(state);
+               prandom32(state);
+               prandom32(state);
+               prandom32(state);
+               prandom32(state);
+               prandom32(state);
+               prandom32(state);
        }
        return 0;
 }
@@ -147,7 +143,7 @@ static int __init random32_reseed(void)
                state->s3 = __seed(seeds[2], 15);
 
                /* mix it in */
-               __random32(state);
+               prandom32(state);
        }
        return 0;
 }
index 5fddf72..a009055 100644 (file)
@@ -757,37 +757,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
 
 /*
- * Same as above, but for a sub-range of the mapping.
- */
-static void
-swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
-                         unsigned long offset, size_t size,
-                         int dir, int target)
-{
-       swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
-}
-
-void
-swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-                                 unsigned long offset, size_t size,
-                                 enum dma_data_direction dir)
-{
-       swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-                                 SYNC_FOR_CPU);
-}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
-
-void
-swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
-                                    unsigned long offset, size_t size,
-                                    enum dma_data_direction dir)
-{
-       swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-                                 SYNC_FOR_DEVICE);
-}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
-
-/*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
  * This is the scatter-gather version of the above swiotlb_map_page
  * interface.  Here the scatter gather list elements are each tagged with the
index 88d7196..45a2d18 100644 (file)
@@ -1105,6 +1105,12 @@ page_not_up_to_date_locked:
                }
 
 readpage:
+               /*
+                * A previous I/O error may have been due to temporary
+                * failures, eg. multipath errors.
+                * PG_error will be set again if readpage fails.
+                */
+               ClearPageError(page);
                /* Start the actual read. The read will unlock the page. */
                error = mapping->a_ops->readpage(filp, page);
 
@@ -1269,7 +1275,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 {
        struct file *filp = iocb->ki_filp;
        ssize_t retval;
-       unsigned long seg;
+       unsigned long seg = 0;
        size_t count;
        loff_t *ppos = &iocb->ki_pos;
 
@@ -1296,21 +1302,47 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                                retval = mapping->a_ops->direct_IO(READ, iocb,
                                                        iov, pos, nr_segs);
                        }
-                       if (retval > 0)
+                       if (retval > 0) {
                                *ppos = pos + retval;
-                       if (retval) {
+                               count -= retval;
+                       }
+
+                       /*
+                        * Btrfs can have a short DIO read if we encounter
+                        * compressed extents, so if there was an error, or if
+                        * we've already read everything we wanted to, or if
+                        * there was a short read because we hit EOF, go ahead
+                        * and return.  Otherwise fallthrough to buffered io for
+                        * the rest of the read.
+                        */
+                       if (retval < 0 || !count || *ppos >= size) {
                                file_accessed(filp);
                                goto out;
                        }
                }
        }
 
+       count = retval;
        for (seg = 0; seg < nr_segs; seg++) {
                read_descriptor_t desc;
+               loff_t offset = 0;
+
+               /*
+                * If we did a short DIO read we need to skip the section of the
+                * iov that we've already read data into.
+                */
+               if (count) {
+                       if (count > iov[seg].iov_len) {
+                               count -= iov[seg].iov_len;
+                               continue;
+                       }
+                       offset = count;
+                       count = 0;
+               }
 
                desc.written = 0;
-               desc.arg.buf = iov[seg].iov_base;
-               desc.count = iov[seg].iov_len;
+               desc.arg.buf = iov[seg].iov_base + offset;
+               desc.count = iov[seg].iov_len - offset;
                if (desc.count == 0)
                        continue;
                desc.error = 0;
index c8569bc..c6ece0a 100644 (file)
@@ -149,16 +149,35 @@ struct mem_cgroup_threshold {
        u64 threshold;
 };
 
+/* For threshold */
 struct mem_cgroup_threshold_ary {
        /* An array index points to threshold just below usage. */
-       atomic_t current_threshold;
+       int current_threshold;
        /* Size of entries[] */
        unsigned int size;
        /* Array of thresholds */
        struct mem_cgroup_threshold entries[0];
 };
 
+struct mem_cgroup_thresholds {
+       /* Primary thresholds array */
+       struct mem_cgroup_threshold_ary *primary;
+       /*
+        * Spare threshold array.
+        * This is needed to make mem_cgroup_unregister_event() "never fail".
+        * It must be able to store at least primary->size - 1 entries.
+        */
+       struct mem_cgroup_threshold_ary *spare;
+};
+
+/* for OOM */
+struct mem_cgroup_eventfd_list {
+       struct list_head list;
+       struct eventfd_ctx *eventfd;
+};
+
 static void mem_cgroup_threshold(struct mem_cgroup *mem);
+static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
 
 /*
  * The memory controller data structure. The memory controller controls both
@@ -207,6 +226,8 @@ struct mem_cgroup {
        atomic_t        refcnt;
 
        unsigned int    swappiness;
+       /* OOM-Killer disable */
+       int             oom_kill_disable;
 
        /* set when res.limit == memsw.limit */
        bool            memsw_is_minimum;
@@ -215,17 +236,19 @@ struct mem_cgroup {
        struct mutex thresholds_lock;
 
        /* thresholds for memory usage. RCU-protected */
-       struct mem_cgroup_threshold_ary *thresholds;
+       struct mem_cgroup_thresholdthresholds;
 
        /* thresholds for mem+swap usage. RCU-protected */
-       struct mem_cgroup_threshold_ary *memsw_thresholds;
+       struct mem_cgroup_thresholds memsw_thresholds;
+
+       /* For oom notifier event fd */
+       struct list_head oom_notify;
 
        /*
         * Should we move charges of a task when a task is moved into this
         * mem_cgroup ? And what type of charges should we move ?
         */
        unsigned long   move_charge_at_immigrate;
-
        /*
         * percpu counter.
         */
@@ -239,6 +262,7 @@ struct mem_cgroup {
  */
 enum move_type {
        MOVE_CHARGE_TYPE_ANON,  /* private anonymous page and swap of it */
+       MOVE_CHARGE_TYPE_FILE,  /* file page(including tmpfs) and swap of it */
        NR_MOVE_TYPE,
 };
 
@@ -255,6 +279,18 @@ static struct move_charge_struct {
        .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
 };
 
+static bool move_anon(void)
+{
+       return test_bit(MOVE_CHARGE_TYPE_ANON,
+                                       &mc.to->move_charge_at_immigrate);
+}
+
+static bool move_file(void)
+{
+       return test_bit(MOVE_CHARGE_TYPE_FILE,
+                                       &mc.to->move_charge_at_immigrate);
+}
+
 /*
  * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
  * limit reclaim to prevent infinite loops, if they ever occur.
@@ -282,9 +318,12 @@ enum charge_type {
 /* for encoding cft->private value on file */
 #define _MEM                   (0)
 #define _MEMSWAP               (1)
+#define _OOM_TYPE              (2)
 #define MEMFILE_PRIVATE(x, val)        (((x) << 16) | (val))
 #define MEMFILE_TYPE(val)      (((val) >> 16) & 0xffff)
 #define MEMFILE_ATTR(val)      ((val) & 0xffff)
+/* Used for OOM nofiier */
+#define OOM_CONTROL            (0)
 
 /*
  * Reclaim flags for mem_cgroup_hierarchical_reclaim
@@ -1293,14 +1332,62 @@ static void mem_cgroup_oom_unlock(struct mem_cgroup *mem)
 static DEFINE_MUTEX(memcg_oom_mutex);
 static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
 
+struct oom_wait_info {
+       struct mem_cgroup *mem;
+       wait_queue_t    wait;
+};
+
+static int memcg_oom_wake_function(wait_queue_t *wait,
+       unsigned mode, int sync, void *arg)
+{
+       struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg;
+       struct oom_wait_info *oom_wait_info;
+
+       oom_wait_info = container_of(wait, struct oom_wait_info, wait);
+
+       if (oom_wait_info->mem == wake_mem)
+               goto wakeup;
+       /* if no hierarchy, no match */
+       if (!oom_wait_info->mem->use_hierarchy || !wake_mem->use_hierarchy)
+               return 0;
+       /*
+        * Both of oom_wait_info->mem and wake_mem are stable under us.
+        * Then we can use css_is_ancestor without taking care of RCU.
+        */
+       if (!css_is_ancestor(&oom_wait_info->mem->css, &wake_mem->css) &&
+           !css_is_ancestor(&wake_mem->css, &oom_wait_info->mem->css))
+               return 0;
+
+wakeup:
+       return autoremove_wake_function(wait, mode, sync, arg);
+}
+
+static void memcg_wakeup_oom(struct mem_cgroup *mem)
+{
+       /* for filtering, pass "mem" as argument. */
+       __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, mem);
+}
+
+static void memcg_oom_recover(struct mem_cgroup *mem)
+{
+       if (mem->oom_kill_disable && atomic_read(&mem->oom_lock))
+               memcg_wakeup_oom(mem);
+}
+
 /*
  * try to call OOM killer. returns false if we should exit memory-reclaim loop.
  */
 bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
 {
-       DEFINE_WAIT(wait);
-       bool locked;
+       struct oom_wait_info owait;
+       bool locked, need_to_kill;
 
+       owait.mem = mem;
+       owait.wait.flags = 0;
+       owait.wait.func = memcg_oom_wake_function;
+       owait.wait.private = current;
+       INIT_LIST_HEAD(&owait.wait.task_list);
+       need_to_kill = true;
        /* At first, try to OOM lock hierarchy under mem.*/
        mutex_lock(&memcg_oom_mutex);
        locked = mem_cgroup_oom_lock(mem);
@@ -1309,32 +1396,23 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
         * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
         * under OOM is always welcomed, use TASK_KILLABLE here.
         */
-       if (!locked)
-               prepare_to_wait(&memcg_oom_waitq, &wait, TASK_KILLABLE);
+       prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
+       if (!locked || mem->oom_kill_disable)
+               need_to_kill = false;
+       if (locked)
+               mem_cgroup_oom_notify(mem);
        mutex_unlock(&memcg_oom_mutex);
 
-       if (locked)
+       if (need_to_kill) {
+               finish_wait(&memcg_oom_waitq, &owait.wait);
                mem_cgroup_out_of_memory(mem, mask);
-       else {
+       else {
                schedule();
-               finish_wait(&memcg_oom_waitq, &wait);
+               finish_wait(&memcg_oom_waitq, &owait.wait);
        }
        mutex_lock(&memcg_oom_mutex);
        mem_cgroup_oom_unlock(mem);
-       /*
-        * Here, we use global waitq .....more fine grained waitq ?
-        * Assume following hierarchy.
-        * A/
-        *   01
-        *   02
-        * assume OOM happens both in A and 01 at the same time. Tthey are
-        * mutually exclusive by lock. (kill in 01 helps A.)
-        * When we use per memcg waitq, we have to wake up waiters on A and 02
-        * in addtion to waiters on 01. We use global waitq for avoiding mess.
-        * It will not be a big problem.
-        * (And a task may be moved to other groups while it's waiting for OOM.)
-        */
-       wake_up_all(&memcg_oom_waitq);
+       memcg_wakeup_oom(mem);
        mutex_unlock(&memcg_oom_mutex);
 
        if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
@@ -2118,15 +2196,6 @@ __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype)
        /* If swapout, usage of swap doesn't decrease */
        if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
                uncharge_memsw = false;
-       /*
-        * do_batch > 0 when unmapping pages or inode invalidate/truncate.
-        * In those cases, all pages freed continously can be expected to be in
-        * the same cgroup and we have chance to coalesce uncharges.
-        * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
-        * because we want to do uncharge as soon as possible.
-        */
-       if (!current->memcg_batch.do_batch || test_thread_flag(TIF_MEMDIE))
-               goto direct_uncharge;
 
        batch = &current->memcg_batch;
        /*
@@ -2137,6 +2206,17 @@ __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype)
        if (!batch->memcg)
                batch->memcg = mem;
        /*
+        * do_batch > 0 when unmapping pages or inode invalidate/truncate.
+        * In those cases, all pages freed continously can be expected to be in
+        * the same cgroup and we have chance to coalesce uncharges.
+        * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
+        * because we want to do uncharge as soon as possible.
+        */
+
+       if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
+               goto direct_uncharge;
+
+       /*
         * In typical case, batch->memcg == mem. This means we can
         * merge a series of uncharges to an uncharge of res_counter.
         * If not, we uncharge res_counter ony by one.
@@ -2152,6 +2232,8 @@ direct_uncharge:
        res_counter_uncharge(&mem->res, PAGE_SIZE);
        if (uncharge_memsw)
                res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+       if (unlikely(batch->memcg != mem))
+               memcg_oom_recover(mem);
        return;
 }
 
@@ -2188,7 +2270,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
        switch (ctype) {
        case MEM_CGROUP_CHARGE_TYPE_MAPPED:
        case MEM_CGROUP_CHARGE_TYPE_DROP:
-               if (page_mapped(page))
+               /* See mem_cgroup_prepare_migration() */
+               if (page_mapped(page) || PageCgroupMigration(pc))
                        goto unlock_out;
                break;
        case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
@@ -2288,6 +2371,7 @@ void mem_cgroup_uncharge_end(void)
                res_counter_uncharge(&batch->memcg->res, batch->bytes);
        if (batch->memsw_bytes)
                res_counter_uncharge(&batch->memcg->memsw, batch->memsw_bytes);
+       memcg_oom_recover(batch->memcg);
        /* forget this pointer (for sanity check) */
        batch->memcg = NULL;
 }
@@ -2410,10 +2494,12 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
  * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
  * page belongs to.
  */
-int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
+int mem_cgroup_prepare_migration(struct page *page,
+       struct page *newpage, struct mem_cgroup **ptr)
 {
        struct page_cgroup *pc;
        struct mem_cgroup *mem = NULL;
+       enum charge_type ctype;
        int ret = 0;
 
        if (mem_cgroup_disabled())
@@ -2424,69 +2510,125 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
        if (PageCgroupUsed(pc)) {
                mem = pc->mem_cgroup;
                css_get(&mem->css);
+               /*
+                * At migrating an anonymous page, its mapcount goes down
+                * to 0 and uncharge() will be called. But, even if it's fully
+                * unmapped, migration may fail and this page has to be
+                * charged again. We set MIGRATION flag here and delay uncharge
+                * until end_migration() is called
+                *
+                * Corner Case Thinking
+                * A)
+                * When the old page was mapped as Anon and it's unmap-and-freed
+                * while migration was ongoing.
+                * If unmap finds the old page, uncharge() of it will be delayed
+                * until end_migration(). If unmap finds a new page, it's
+                * uncharged when it make mapcount to be 1->0. If unmap code
+                * finds swap_migration_entry, the new page will not be mapped
+                * and end_migration() will find it(mapcount==0).
+                *
+                * B)
+                * When the old page was mapped but migraion fails, the kernel
+                * remaps it. A charge for it is kept by MIGRATION flag even
+                * if mapcount goes down to 0. We can do remap successfully
+                * without charging it again.
+                *
+                * C)
+                * The "old" page is under lock_page() until the end of
+                * migration, so, the old page itself will not be swapped-out.
+                * If the new page is swapped out before end_migraton, our
+                * hook to usual swap-out path will catch the event.
+                */
+               if (PageAnon(page))
+                       SetPageCgroupMigration(pc);
        }
        unlock_page_cgroup(pc);
+       /*
+        * If the page is not charged at this point,
+        * we return here.
+        */
+       if (!mem)
+               return 0;
 
        *ptr = mem;
-       if (mem) {
-               ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false);
-               css_put(&mem->css);
+       ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false);
+       css_put(&mem->css);/* drop extra refcnt */
+       if (ret || *ptr == NULL) {
+               if (PageAnon(page)) {
+                       lock_page_cgroup(pc);
+                       ClearPageCgroupMigration(pc);
+                       unlock_page_cgroup(pc);
+                       /*
+                        * The old page may be fully unmapped while we kept it.
+                        */
+                       mem_cgroup_uncharge_page(page);
+               }
+               return -ENOMEM;
        }
+       /*
+        * We charge new page before it's used/mapped. So, even if unlock_page()
+        * is called before end_migration, we can catch all events on this new
+        * page. In the case new page is migrated but not remapped, new page's
+        * mapcount will be finally 0 and we call uncharge in end_migration().
+        */
+       pc = lookup_page_cgroup(newpage);
+       if (PageAnon(page))
+               ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+       else if (page_is_file_cache(page))
+               ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+       else
+               ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+       __mem_cgroup_commit_charge(mem, pc, ctype);
        return ret;
 }
 
 /* remove redundant charge if migration failed*/
 void mem_cgroup_end_migration(struct mem_cgroup *mem,
-               struct page *oldpage, struct page *newpage)
+       struct page *oldpage, struct page *newpage)
 {
-       struct page *target, *unused;
+       struct page *used, *unused;
        struct page_cgroup *pc;
-       enum charge_type ctype;
 
        if (!mem)
                return;
+       /* blocks rmdir() */
        cgroup_exclude_rmdir(&mem->css);
        /* at migration success, oldpage->mapping is NULL. */
        if (oldpage->mapping) {
-               target = oldpage;
-               unused = NULL;
+               used = oldpage;
+               unused = newpage;
        } else {
-               target = newpage;
+               used = newpage;
                unused = oldpage;
        }
-
-       if (PageAnon(target))
-               ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
-       else if (page_is_file_cache(target))
-               ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
-       else
-               ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-
-       /* unused page is not on radix-tree now. */
-       if (unused)
-               __mem_cgroup_uncharge_common(unused, ctype);
-
-       pc = lookup_page_cgroup(target);
        /*
-        * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup.
-        * So, double-counting is effectively avoided.
+        * We disallowed uncharge of pages under migration because mapcount
+        * of the page goes down to zero, temporarly.
+        * Clear the flag and check the page should be charged.
         */
-       __mem_cgroup_commit_charge(mem, pc, ctype);
+       pc = lookup_page_cgroup(oldpage);
+       lock_page_cgroup(pc);
+       ClearPageCgroupMigration(pc);
+       unlock_page_cgroup(pc);
+
+       if (unused != oldpage)
+               pc = lookup_page_cgroup(unused);
+       __mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE);
 
+       pc = lookup_page_cgroup(used);
        /*
-        * Both of oldpage and newpage are still under lock_page().
-        * Then, we don't have to care about race in radix-tree.
-        * But we have to be careful that this page is unmapped or not.
-        *
-        * There is a case for !page_mapped(). At the start of
-        * migration, oldpage was mapped. But now, it's zapped.
-        * But we know *target* page is not freed/reused under us.
-        * mem_cgroup_uncharge_page() does all necessary checks.
+        * If a page is a file cache, radix-tree replacement is very atomic
+        * and we can skip this check. When it was an Anon page, its mapcount
+        * goes down to 0. But because we added MIGRATION flage, it's not
+        * uncharged yet. There are several case but page->mapcount check
+        * and USED bit check in mem_cgroup_uncharge_page() will do enough
+        * check. (see prepare_charge() also)
         */
-       if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
-               mem_cgroup_uncharge_page(target);
+       if (PageAnon(used))
+               mem_cgroup_uncharge_page(used);
        /*
-        * At migration, we may charge account against cgroup which has no tasks
+        * At migration, we may charge account against cgroup which has no
+        * tasks.
         * So, rmdir()->pre_destroy() can be called while we do this charge.
         * In that case, we need to call pre_destroy() again. check it here.
         */
@@ -2524,10 +2666,11 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                                unsigned long long val)
 {
        int retry_count;
-       u64 memswlimit;
+       u64 memswlimit, memlimit;
        int ret = 0;
        int children = mem_cgroup_count_children(memcg);
        u64 curusage, oldusage;
+       int enlarge;
 
        /*
         * For keeping hierarchical_reclaim simple, how long we should retry
@@ -2538,6 +2681,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 
        oldusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 
+       enlarge = 0;
        while (retry_count) {
                if (signal_pending(current)) {
                        ret = -EINTR;
@@ -2555,6 +2699,11 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                        mutex_unlock(&set_limit_mutex);
                        break;
                }
+
+               memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
+               if (memlimit < val)
+                       enlarge = 1;
+
                ret = res_counter_set_limit(&memcg->res, val);
                if (!ret) {
                        if (memswlimit == val)
@@ -2576,6 +2725,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                else
                        oldusage = curusage;
        }
+       if (!ret && enlarge)
+               memcg_oom_recover(memcg);
 
        return ret;
 }
@@ -2584,9 +2735,10 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
                                        unsigned long long val)
 {
        int retry_count;
-       u64 memlimit, oldusage, curusage;
+       u64 memlimit, memswlimit, oldusage, curusage;
        int children = mem_cgroup_count_children(memcg);
        int ret = -EBUSY;
+       int enlarge = 0;
 
        /* see mem_cgroup_resize_res_limit */
        retry_count = children * MEM_CGROUP_RECLAIM_RETRIES;
@@ -2608,6 +2760,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
                        mutex_unlock(&set_limit_mutex);
                        break;
                }
+               memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
+               if (memswlimit < val)
+                       enlarge = 1;
                ret = res_counter_set_limit(&memcg->memsw, val);
                if (!ret) {
                        if (memlimit == val)
@@ -2630,6 +2785,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
                else
                        oldusage = curusage;
        }
+       if (!ret && enlarge)
+               memcg_oom_recover(memcg);
        return ret;
 }
 
@@ -2821,6 +2978,7 @@ move_account:
                        if (ret)
                                break;
                }
+               memcg_oom_recover(mem);
                /* it seems parent cgroup doesn't have enough mem */
                if (ret == -ENOMEM)
                        goto try_to_free;
@@ -3311,9 +3469,9 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
 
        rcu_read_lock();
        if (!swap)
-               t = rcu_dereference(memcg->thresholds);
+               t = rcu_dereference(memcg->thresholds.primary);
        else
-               t = rcu_dereference(memcg->memsw_thresholds);
+               t = rcu_dereference(memcg->memsw_thresholds.primary);
 
        if (!t)
                goto unlock;
@@ -3325,7 +3483,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
         * If it's not true, a threshold was crossed after last
         * call of __mem_cgroup_threshold().
         */
-       i = atomic_read(&t->current_threshold);
+       i = t->current_threshold;
 
        /*
         * Iterate backward over array of thresholds starting from
@@ -3349,7 +3507,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
                eventfd_signal(t->entries[i].eventfd, 1);
 
        /* Update current_threshold */
-       atomic_set(&t->current_threshold, i - 1);
+       t->current_threshold = i - 1;
 unlock:
        rcu_read_unlock();
 }
@@ -3369,106 +3527,117 @@ static int compare_thresholds(const void *a, const void *b)
        return _a->threshold - _b->threshold;
 }
 
-static int mem_cgroup_register_event(struct cgroup *cgrp, struct cftype *cft,
-               struct eventfd_ctx *eventfd, const char *args)
+static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data)
+{
+       struct mem_cgroup_eventfd_list *ev;
+
+       list_for_each_entry(ev, &mem->oom_notify, list)
+               eventfd_signal(ev->eventfd, 1);
+       return 0;
+}
+
+static void mem_cgroup_oom_notify(struct mem_cgroup *mem)
+{
+       mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb);
+}
+
+static int mem_cgroup_usage_register_event(struct cgroup *cgrp,
+       struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
-       struct mem_cgroup_threshold_ary *thresholds, *thresholds_new;
+       struct mem_cgroup_thresholds *thresholds;
+       struct mem_cgroup_threshold_ary *new;
        int type = MEMFILE_TYPE(cft->private);
        u64 threshold, usage;
-       int size;
-       int i, ret;
+       int i, size, ret;
 
        ret = res_counter_memparse_write_strategy(args, &threshold);
        if (ret)
                return ret;
 
        mutex_lock(&memcg->thresholds_lock);
+
        if (type == _MEM)
-               thresholds = memcg->thresholds;
+               thresholds = &memcg->thresholds;
        else if (type == _MEMSWAP)
-               thresholds = memcg->memsw_thresholds;
+               thresholds = &memcg->memsw_thresholds;
        else
                BUG();
 
        usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
 
        /* Check if a threshold crossed before adding a new one */
-       if (thresholds)
+       if (thresholds->primary)
                __mem_cgroup_threshold(memcg, type == _MEMSWAP);
 
-       if (thresholds)
-               size = thresholds->size + 1;
-       else
-               size = 1;
+       size = thresholds->primary ? thresholds->primary->size + 1 : 1;
 
        /* Allocate memory for new array of thresholds */
-       thresholds_new = kmalloc(sizeof(*thresholds_new) +
-                       size * sizeof(struct mem_cgroup_threshold),
+       new = kmalloc(sizeof(*new) + size * sizeof(struct mem_cgroup_threshold),
                        GFP_KERNEL);
-       if (!thresholds_new) {
+       if (!new) {
                ret = -ENOMEM;
                goto unlock;
        }
-       thresholds_new->size = size;
+       new->size = size;
 
        /* Copy thresholds (if any) to new array */
-       if (thresholds)
-               memcpy(thresholds_new->entries, thresholds->entries,
-                               thresholds->size *
+       if (thresholds->primary) {
+               memcpy(new->entries, thresholds->primary->entries, (size - 1) *
                                sizeof(struct mem_cgroup_threshold));
+       }
+
        /* Add new threshold */
-       thresholds_new->entries[size - 1].eventfd = eventfd;
-       thresholds_new->entries[size - 1].threshold = threshold;
+       new->entries[size - 1].eventfd = eventfd;
+       new->entries[size - 1].threshold = threshold;
 
        /* Sort thresholds. Registering of new threshold isn't time-critical */
-       sort(thresholds_new->entries, size,
-                       sizeof(struct mem_cgroup_threshold),
+       sort(new->entries, size, sizeof(struct mem_cgroup_threshold),
                        compare_thresholds, NULL);
 
        /* Find current threshold */
-       atomic_set(&thresholds_new->current_threshold, -1);
+       new->current_threshold = -1;
        for (i = 0; i < size; i++) {
-               if (thresholds_new->entries[i].threshold < usage) {
+               if (new->entries[i].threshold < usage) {
                        /*
-                        * thresholds_new->current_threshold will not be used
-                        * until rcu_assign_pointer(), so it's safe to increment
+                        * new->current_threshold will not be used until
+                        * rcu_assign_pointer(), so it's safe to increment
                         * it here.
                         */
-                       atomic_inc(&thresholds_new->current_threshold);
+                       ++new->current_threshold;
                }
        }
 
-       if (type == _MEM)
-               rcu_assign_pointer(memcg->thresholds, thresholds_new);
-       else
-               rcu_assign_pointer(memcg->memsw_thresholds, thresholds_new);
+       /* Free old spare buffer and save old primary buffer as spare */
+       kfree(thresholds->spare);
+       thresholds->spare = thresholds->primary;
+
+       rcu_assign_pointer(thresholds->primary, new);
 
-       /* To be sure that nobody uses thresholds before freeing it */
+       /* To be sure that nobody uses thresholds */
        synchronize_rcu();
 
-       kfree(thresholds);
 unlock:
        mutex_unlock(&memcg->thresholds_lock);
 
        return ret;
 }
 
-static int mem_cgroup_unregister_event(struct cgroup *cgrp, struct cftype *cft,
-               struct eventfd_ctx *eventfd)
+static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp,
+       struct cftype *cft, struct eventfd_ctx *eventfd)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
-       struct mem_cgroup_threshold_ary *thresholds, *thresholds_new;
+       struct mem_cgroup_thresholds *thresholds;
+       struct mem_cgroup_threshold_ary *new;
        int type = MEMFILE_TYPE(cft->private);
        u64 usage;
-       int size = 0;
-       int i, j, ret;
+       int i, j, size;
 
        mutex_lock(&memcg->thresholds_lock);
        if (type == _MEM)
-               thresholds = memcg->thresholds;
+               thresholds = &memcg->thresholds;
        else if (type == _MEMSWAP)
-               thresholds = memcg->memsw_thresholds;
+               thresholds = &memcg->memsw_thresholds;
        else
                BUG();
 
@@ -3484,59 +3653,136 @@ static int mem_cgroup_unregister_event(struct cgroup *cgrp, struct cftype *cft,
        __mem_cgroup_threshold(memcg, type == _MEMSWAP);
 
        /* Calculate new number of threshold */
-       for (i = 0; i < thresholds->size; i++) {
-               if (thresholds->entries[i].eventfd != eventfd)
+       size = 0;
+       for (i = 0; i < thresholds->primary->size; i++) {
+               if (thresholds->primary->entries[i].eventfd != eventfd)
                        size++;
        }
 
+       new = thresholds->spare;
+
        /* Set thresholds array to NULL if we don't have thresholds */
        if (!size) {
-               thresholds_new = NULL;
-               goto assign;
+               kfree(new);
+               new = NULL;
+               goto swap_buffers;
        }
 
-       /* Allocate memory for new array of thresholds */
-       thresholds_new = kmalloc(sizeof(*thresholds_new) +
-                       size * sizeof(struct mem_cgroup_threshold),
-                       GFP_KERNEL);
-       if (!thresholds_new) {
-               ret = -ENOMEM;
-               goto unlock;
-       }
-       thresholds_new->size = size;
+       new->size = size;
 
        /* Copy thresholds and find current threshold */
-       atomic_set(&thresholds_new->current_threshold, -1);
-       for (i = 0, j = 0; i < thresholds->size; i++) {
-               if (thresholds->entries[i].eventfd == eventfd)
+       new->current_threshold = -1;
+       for (i = 0, j = 0; i < thresholds->primary->size; i++) {
+               if (thresholds->primary->entries[i].eventfd == eventfd)
                        continue;
 
-               thresholds_new->entries[j] = thresholds->entries[i];
-               if (thresholds_new->entries[j].threshold < usage) {
+               new->entries[j] = thresholds->primary->entries[i];
+               if (new->entries[j].threshold < usage) {
                        /*
-                        * thresholds_new->current_threshold will not be used
+                        * new->current_threshold will not be used
                         * until rcu_assign_pointer(), so it's safe to increment
                         * it here.
                         */
-                       atomic_inc(&thresholds_new->current_threshold);
+                       ++new->current_threshold;
                }
                j++;
        }
 
-assign:
-       if (type == _MEM)
-               rcu_assign_pointer(memcg->thresholds, thresholds_new);
-       else
-               rcu_assign_pointer(memcg->memsw_thresholds, thresholds_new);
+swap_buffers:
+       /* Swap primary and spare array */
+       thresholds->spare = thresholds->primary;
+       rcu_assign_pointer(thresholds->primary, new);
 
-       /* To be sure that nobody uses thresholds before freeing it */
+       /* To be sure that nobody uses thresholds */
        synchronize_rcu();
 
-       kfree(thresholds);
-unlock:
        mutex_unlock(&memcg->thresholds_lock);
+}
 
-       return ret;
+static int mem_cgroup_oom_register_event(struct cgroup *cgrp,
+       struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+       struct mem_cgroup_eventfd_list *event;
+       int type = MEMFILE_TYPE(cft->private);
+
+       BUG_ON(type != _OOM_TYPE);
+       event = kmalloc(sizeof(*event), GFP_KERNEL);
+       if (!event)
+               return -ENOMEM;
+
+       mutex_lock(&memcg_oom_mutex);
+
+       event->eventfd = eventfd;
+       list_add(&event->list, &memcg->oom_notify);
+
+       /* already in OOM ? */
+       if (atomic_read(&memcg->oom_lock))
+               eventfd_signal(eventfd, 1);
+       mutex_unlock(&memcg_oom_mutex);
+
+       return 0;
+}
+
+static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
+       struct cftype *cft, struct eventfd_ctx *eventfd)
+{
+       struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+       struct mem_cgroup_eventfd_list *ev, *tmp;
+       int type = MEMFILE_TYPE(cft->private);
+
+       BUG_ON(type != _OOM_TYPE);
+
+       mutex_lock(&memcg_oom_mutex);
+
+       list_for_each_entry_safe(ev, tmp, &mem->oom_notify, list) {
+               if (ev->eventfd == eventfd) {
+                       list_del(&ev->list);
+                       kfree(ev);
+               }
+       }
+
+       mutex_unlock(&memcg_oom_mutex);
+}
+
+static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
+       struct cftype *cft,  struct cgroup_map_cb *cb)
+{
+       struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+
+       cb->fill(cb, "oom_kill_disable", mem->oom_kill_disable);
+
+       if (atomic_read(&mem->oom_lock))
+               cb->fill(cb, "under_oom", 1);
+       else
+               cb->fill(cb, "under_oom", 0);
+       return 0;
+}
+
+/*
+ */
+static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
+       struct cftype *cft, u64 val)
+{
+       struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+       struct mem_cgroup *parent;
+
+       /* cannot set to root cgroup and only 0 and 1 are allowed */
+       if (!cgrp->parent || !((val == 0) || (val == 1)))
+               return -EINVAL;
+
+       parent = mem_cgroup_from_cont(cgrp->parent);
+
+       cgroup_lock();
+       /* oom-kill-disable is a flag for subhierarchy. */
+       if ((parent->use_hierarchy) ||
+           (mem->use_hierarchy && !list_empty(&cgrp->children))) {
+               cgroup_unlock();
+               return -EINVAL;
+       }
+       mem->oom_kill_disable = val;
+       cgroup_unlock();
+       return 0;
 }
 
 static struct cftype mem_cgroup_files[] = {
@@ -3544,8 +3790,8 @@ static struct cftype mem_cgroup_files[] = {
                .name = "usage_in_bytes",
                .private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
                .read_u64 = mem_cgroup_read,
-               .register_event = mem_cgroup_register_event,
-               .unregister_event = mem_cgroup_unregister_event,
+               .register_event = mem_cgroup_usage_register_event,
+               .unregister_event = mem_cgroup_usage_unregister_event,
        },
        {
                .name = "max_usage_in_bytes",
@@ -3594,6 +3840,14 @@ static struct cftype mem_cgroup_files[] = {
                .read_u64 = mem_cgroup_move_charge_read,
                .write_u64 = mem_cgroup_move_charge_write,
        },
+       {
+               .name = "oom_control",
+               .read_map = mem_cgroup_oom_control_read,
+               .write_u64 = mem_cgroup_oom_control_write,
+               .register_event = mem_cgroup_oom_register_event,
+               .unregister_event = mem_cgroup_oom_unregister_event,
+               .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
+       },
 };
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -3602,8 +3856,8 @@ static struct cftype memsw_cgroup_files[] = {
                .name = "memsw.usage_in_bytes",
                .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
                .read_u64 = mem_cgroup_read,
-               .register_event = mem_cgroup_register_event,
-               .unregister_event = mem_cgroup_unregister_event,
+               .register_event = mem_cgroup_usage_register_event,
+               .unregister_event = mem_cgroup_usage_unregister_event,
        },
        {
                .name = "memsw.max_usage_in_bytes",
@@ -3831,6 +4085,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        } else {
                parent = mem_cgroup_from_cont(cont->parent);
                mem->use_hierarchy = parent->use_hierarchy;
+               mem->oom_kill_disable = parent->oom_kill_disable;
        }
 
        if (parent && parent->use_hierarchy) {
@@ -3849,6 +4104,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        }
        mem->last_scanned_child = 0;
        spin_lock_init(&mem->reclaim_param_lock);
+       INIT_LIST_HEAD(&mem->oom_notify);
 
        if (parent)
                mem->swappiness = get_swappiness(parent);
@@ -3976,6 +4232,80 @@ enum mc_target_type {
        MC_TARGET_SWAP,
 };
 
+static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
+                                               unsigned long addr, pte_t ptent)
+{
+       struct page *page = vm_normal_page(vma, addr, ptent);
+
+       if (!page || !page_mapped(page))
+               return NULL;
+       if (PageAnon(page)) {
+               /* we don't move shared anon */
+               if (!move_anon() || page_mapcount(page) > 2)
+                       return NULL;
+       } else if (!move_file())
+               /* we ignore mapcount for file pages */
+               return NULL;
+       if (!get_page_unless_zero(page))
+               return NULL;
+
+       return page;
+}
+
+static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
+                       unsigned long addr, pte_t ptent, swp_entry_t *entry)
+{
+       int usage_count;
+       struct page *page = NULL;
+       swp_entry_t ent = pte_to_swp_entry(ptent);
+
+       if (!move_anon() || non_swap_entry(ent))
+               return NULL;
+       usage_count = mem_cgroup_count_swap_user(ent, &page);
+       if (usage_count > 1) { /* we don't move shared anon */
+               if (page)
+                       put_page(page);
+               return NULL;
+       }
+       if (do_swap_account)
+               entry->val = ent.val;
+
+       return page;
+}
+
+static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
+                       unsigned long addr, pte_t ptent, swp_entry_t *entry)
+{
+       struct page *page = NULL;
+       struct inode *inode;
+       struct address_space *mapping;
+       pgoff_t pgoff;
+
+       if (!vma->vm_file) /* anonymous vma */
+               return NULL;
+       if (!move_file())
+               return NULL;
+
+       inode = vma->vm_file->f_path.dentry->d_inode;
+       mapping = vma->vm_file->f_mapping;
+       if (pte_none(ptent))
+               pgoff = linear_page_index(vma, addr);
+       else /* pte_file(ptent) is true */
+               pgoff = pte_to_pgoff(ptent);
+
+       /* page is moved even if it's not RSS of this task(page-faulted). */
+       if (!mapping_cap_swap_backed(mapping)) { /* normal file */
+               page = find_get_page(mapping, pgoff);
+       } else { /* shmem/tmpfs file. we should take account of swap too. */
+               swp_entry_t ent;
+               mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent);
+               if (do_swap_account)
+                       entry->val = ent.val;
+       }
+
+       return page;
+}
+
 static int is_target_pte_for_mc(struct vm_area_struct *vma,
                unsigned long addr, pte_t ptent, union mc_target *target)
 {
@@ -3983,43 +4313,16 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma,
        struct page_cgroup *pc;
        int ret = 0;
        swp_entry_t ent = { .val = 0 };
-       int usage_count = 0;
-       bool move_anon = test_bit(MOVE_CHARGE_TYPE_ANON,
-                                       &mc.to->move_charge_at_immigrate);
 
-       if (!pte_present(ptent)) {
-               /* TODO: handle swap of shmes/tmpfs */
-               if (pte_none(ptent) || pte_file(ptent))
-                       return 0;
-               else if (is_swap_pte(ptent)) {
-                       ent = pte_to_swp_entry(ptent);
-                       if (!move_anon || non_swap_entry(ent))
-                               return 0;
-                       usage_count = mem_cgroup_count_swap_user(ent, &page);
-               }
-       } else {
-               page = vm_normal_page(vma, addr, ptent);
-               if (!page || !page_mapped(page))
-                       return 0;
-               /*
-                * TODO: We don't move charges of file(including shmem/tmpfs)
-                * pages for now.
-                */
-               if (!move_anon || !PageAnon(page))
-                       return 0;
-               if (!get_page_unless_zero(page))
-                       return 0;
-               usage_count = page_mapcount(page);
-       }
-       if (usage_count > 1) {
-               /*
-                * TODO: We don't move charges of shared(used by multiple
-                * processes) pages for now.
-                */
-               if (page)
-                       put_page(page);
+       if (pte_present(ptent))
+               page = mc_handle_present_pte(vma, addr, ptent);
+       else if (is_swap_pte(ptent))
+               page = mc_handle_swap_pte(vma, addr, ptent, &ent);
+       else if (pte_none(ptent) || pte_file(ptent))
+               page = mc_handle_file_pte(vma, addr, ptent, &ent);
+
+       if (!page && !ent.val)
                return 0;
-       }
        if (page) {
                pc = lookup_page_cgroup(page);
                /*
@@ -4035,8 +4338,8 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma,
                if (!ret || !target)
                        put_page(page);
        }
-       /* throught */
-       if (ent.val && do_swap_account && !ret &&
+       /* There is a swap entry and a page doesn't exist or isn't charged */
+       if (ent.val && !ret &&
                        css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
                ret = MC_TARGET_SWAP;
                if (target)
@@ -4077,9 +4380,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
                };
                if (is_vm_hugetlb_page(vma))
                        continue;
-               /* TODO: We don't move charges of shmem/tmpfs pages for now. */
-               if (vma->vm_flags & VM_SHARED)
-                       continue;
                walk_page_range(vma->vm_start, vma->vm_end,
                                        &mem_cgroup_count_precharge_walk);
        }
@@ -4102,6 +4402,7 @@ static void mem_cgroup_clear_mc(void)
        if (mc.precharge) {
                __mem_cgroup_cancel_charge(mc.to, mc.precharge);
                mc.precharge = 0;
+               memcg_oom_recover(mc.to);
        }
        /*
         * we didn't uncharge from mc.from at mem_cgroup_move_account(), so
@@ -4110,6 +4411,7 @@ static void mem_cgroup_clear_mc(void)
        if (mc.moved_charge) {
                __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
                mc.moved_charge = 0;
+               memcg_oom_recover(mc.from);
        }
        /* we must fixup refcnts and charges */
        if (mc.moved_swap) {
@@ -4274,9 +4576,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
                };
                if (is_vm_hugetlb_page(vma))
                        continue;
-               /* TODO: We don't move charges of shmem/tmpfs pages for now. */
-               if (vma->vm_flags & VM_SHARED)
-                       continue;
                ret = walk_page_range(vma->vm_start, vma->vm_end,
                                                &mem_cgroup_move_charge_walk);
                if (ret)
index 09e2471..4205b1d 100644 (file)
@@ -590,7 +590,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
        }
 
        /* charge against new page */
-       charge = mem_cgroup_prepare_migration(page, &mem);
+       charge = mem_cgroup_prepare_migration(page, newpage, &mem);
        if (charge == -ENOMEM) {
                rc = -ENOMEM;
                goto unlock;
index b68e802..709aedf 100644 (file)
@@ -479,12 +479,9 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
        read_lock(&tasklist_lock);
 retry:
        p = select_bad_process(&points, mem);
-       if (PTR_ERR(p) == -1UL)
+       if (!p || PTR_ERR(p) == -1UL)
                goto out;
 
-       if (!p)
-               p = current;
-
        if (oom_kill_process(p, gfp_mask, 0, points, mem,
                                "Memory cgroup out of memory"))
                goto retry;
index 08b3499..431214b 100644 (file)
 #include <asm/div64.h>
 #include "internal.h"
 
+#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
+DEFINE_PER_CPU(int, numa_node);
+EXPORT_PER_CPU_SYMBOL(numa_node);
+#endif
+
+#ifdef CONFIG_HAVE_MEMORYLESS_NODES
+/*
+ * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly.
+ * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined.
+ * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem()
+ * defined in <linux/topology.h>.
+ */
+DEFINE_PER_CPU(int, _numa_mem_);               /* Kernel "local memory" node */
+EXPORT_PER_CPU_SYMBOL(_numa_mem_);
+#endif
+
 /*
  * Array of node states.
  */
@@ -2856,6 +2872,24 @@ static void build_zonelist_cache(pg_data_t *pgdat)
                zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
 }
 
+#ifdef CONFIG_HAVE_MEMORYLESS_NODES
+/*
+ * Return node id of node used for "local" allocations.
+ * I.e., first node id of first zone in arg node's generic zonelist.
+ * Used for initializing percpu 'numa_mem', which is used primarily
+ * for kernel allocations, so use GFP_KERNEL flags to locate zonelist.
+ */
+int local_memory_node(int node)
+{
+       struct zone *zone;
+
+       (void)first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
+                                  gfp_zone(GFP_KERNEL),
+                                  NULL,
+                                  &zone);
+       return zone->node;
+}
+#endif
 
 #else  /* CONFIG_NUMA */
 
@@ -2970,9 +3004,23 @@ static __init_refok int __build_all_zonelists(void *data)
         * needs the percpu allocator in order to allocate its pagesets
         * (a chicken-egg dilemma).
         */
-       for_each_possible_cpu(cpu)
+       for_each_possible_cpu(cpu) {
                setup_pageset(&per_cpu(boot_pageset, cpu), 0);
 
+#ifdef CONFIG_HAVE_MEMORYLESS_NODES
+               /*
+                * We now know the "local memory node" for each node--
+                * i.e., the node of the first zone in the generic zonelist.
+                * Set up numa_mem percpu variable for on-line cpus.  During
+                * boot, only the boot cpu should be on-line;  we'll init the
+                * secondary cpus' numa_mem as they come on-line.  During
+                * node/memory hotplug, we'll fixup all on-line cpus.
+                */
+               if (cpu_online(cpu))
+                       set_cpu_numa_mem(cpu, local_memory_node(cpu_to_node(cpu)));
+#endif
+       }
+
        return 0;
 }
 
index 4ef9797..855eaf5 100644 (file)
@@ -2559,6 +2559,45 @@ out4:
        return error;
 }
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+/**
+ * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
+ * @inode: the inode to be searched
+ * @pgoff: the offset to be searched
+ * @pagep: the pointer for the found page to be stored
+ * @ent: the pointer for the found swap entry to be stored
+ *
+ * If a page is found, refcount of it is incremented. Callers should handle
+ * these refcount.
+ */
+void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
+                                       struct page **pagep, swp_entry_t *ent)
+{
+       swp_entry_t entry = { .val = 0 }, *ptr;
+       struct page *page = NULL;
+       struct shmem_inode_info *info = SHMEM_I(inode);
+
+       if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+               goto out;
+
+       spin_lock(&info->lock);
+       ptr = shmem_swp_entry(info, pgoff, NULL);
+#ifdef CONFIG_SWAP
+       if (ptr && ptr->val) {
+               entry.val = ptr->val;
+               page = find_get_page(&swapper_space, entry.val);
+       } else
+#endif
+               page = find_get_page(inode->i_mapping, pgoff);
+       if (ptr)
+               shmem_swp_unmap(ptr);
+       spin_unlock(&info->lock);
+out:
+       *pagep = page;
+       *ent = entry;
+}
+#endif
+
 #else /* !CONFIG_SHMEM */
 
 /*
@@ -2598,6 +2637,31 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
        return 0;
 }
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+/**
+ * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
+ * @inode: the inode to be searched
+ * @pgoff: the offset to be searched
+ * @pagep: the pointer for the found page to be stored
+ * @ent: the pointer for the found swap entry to be stored
+ *
+ * If a page is found, refcount of it is incremented. Callers should handle
+ * these refcount.
+ */
+void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
+                                       struct page **pagep, swp_entry_t *ent)
+{
+       struct page *page = NULL;
+
+       if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+               goto out;
+       page = find_get_page(inode->i_mapping, pgoff);
+out:
+       *pagep = page;
+       *ent = (swp_entry_t){ .val = 0 };
+}
+#endif
+
 #define shmem_vm_ops                           generic_file_vm_ops
 #define shmem_file_operations                  ramfs_file_operations
 #define shmem_get_inode(sb, dir, mode, dev, flags)     ramfs_get_inode(sb, dir, mode, dev)
index 02786e1..e49f8f4 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -821,7 +821,7 @@ static void init_reap_node(int cpu)
 {
        int node;
 
-       node = next_node(cpu_to_node(cpu), node_online_map);
+       node = next_node(cpu_to_mem(cpu), node_online_map);
        if (node == MAX_NUMNODES)
                node = first_node(node_online_map);
 
@@ -1050,7 +1050,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
        struct array_cache *alien = NULL;
        int node;
 
-       node = numa_node_id();
+       node = numa_mem_id();
 
        /*
         * Make sure we are not freeing a object from another node to the array
@@ -1129,7 +1129,7 @@ static void __cpuinit cpuup_canceled(long cpu)
 {
        struct kmem_cache *cachep;
        struct kmem_list3 *l3 = NULL;
-       int node = cpu_to_node(cpu);
+       int node = cpu_to_mem(cpu);
        const struct cpumask *mask = cpumask_of_node(node);
 
        list_for_each_entry(cachep, &cache_chain, next) {
@@ -1194,7 +1194,7 @@ static int __cpuinit cpuup_prepare(long cpu)
 {
        struct kmem_cache *cachep;
        struct kmem_list3 *l3 = NULL;
-       int node = cpu_to_node(cpu);
+       int node = cpu_to_mem(cpu);
        int err;
 
        /*
@@ -1321,7 +1321,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
                mutex_unlock(&cache_chain_mutex);
                break;
        }
-       return err ? NOTIFY_BAD : NOTIFY_OK;
+       return notifier_from_errno(err);
 }
 
 static struct notifier_block __cpuinitdata cpucache_notifier = {
@@ -1479,7 +1479,7 @@ void __init kmem_cache_init(void)
         * 6) Resize the head arrays of the kmalloc caches to their final sizes.
         */
 
-       node = numa_node_id();
+       node = numa_mem_id();
 
        /* 1) create the cache_cache */
        INIT_LIST_HEAD(&cache_chain);
@@ -2121,7 +2121,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
                        }
                }
        }
-       cachep->nodelists[numa_node_id()]->next_reap =
+       cachep->nodelists[numa_mem_id()]->next_reap =
                        jiffies + REAPTIMEOUT_LIST3 +
                        ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
 
@@ -2452,7 +2452,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
 {
 #ifdef CONFIG_SMP
        check_irq_off();
-       assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
+       assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
 #endif
 }
 
@@ -2479,7 +2479,7 @@ static void do_drain(void *arg)
 {
        struct kmem_cache *cachep = arg;
        struct array_cache *ac;
-       int node = numa_node_id();
+       int node = numa_mem_id();
 
        check_irq_off();
        ac = cpu_cache_get(cachep);
@@ -3012,7 +3012,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
 
 retry:
        check_irq_off();
-       node = numa_node_id();
+       node = numa_mem_id();
        ac = cpu_cache_get(cachep);
        batchcount = ac->batchcount;
        if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3216,10 +3216,10 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
 
        if (in_interrupt() || (flags & __GFP_THISNODE))
                return NULL;
-       nid_alloc = nid_here = numa_node_id();
+       nid_alloc = nid_here = numa_mem_id();
        get_mems_allowed();
        if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
-               nid_alloc = cpuset_mem_spread_node();
+               nid_alloc = cpuset_slab_spread_node();
        else if (current->mempolicy)
                nid_alloc = slab_node(current->mempolicy);
        put_mems_allowed();
@@ -3281,7 +3281,7 @@ retry:
                if (local_flags & __GFP_WAIT)
                        local_irq_enable();
                kmem_flagcheck(cache, flags);
-               obj = kmem_getpages(cache, local_flags, numa_node_id());
+               obj = kmem_getpages(cache, local_flags, numa_mem_id());
                if (local_flags & __GFP_WAIT)
                        local_irq_disable();
                if (obj) {
@@ -3389,6 +3389,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 {
        unsigned long save_flags;
        void *ptr;
+       int slab_node = numa_mem_id();
 
        flags &= gfp_allowed_mask;
 
@@ -3401,7 +3402,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
        local_irq_save(save_flags);
 
        if (nodeid == -1)
-               nodeid = numa_node_id();
+               nodeid = slab_node;
 
        if (unlikely(!cachep->nodelists[nodeid])) {
                /* Node not bootstrapped yet */
@@ -3409,7 +3410,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
                goto out;
        }
 
-       if (nodeid == numa_node_id()) {
+       if (nodeid == slab_node) {
                /*
                 * Use the locally cached objects if possible.
                 * However ____cache_alloc does not allow fallback
@@ -3453,8 +3454,8 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
         * We may just have run out of memory on the local node.
         * ____cache_alloc_node() knows how to locate memory on other nodes
         */
-       if (!objp)
-               objp = ____cache_alloc_node(cache, flags, numa_node_id());
+       if (!objp)
+               objp = ____cache_alloc_node(cache, flags, numa_mem_id());
 
   out:
        return objp;
@@ -3551,7 +3552,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
 {
        int batchcount;
        struct kmem_list3 *l3;
-       int node = numa_node_id();
+       int node = numa_mem_id();
 
        batchcount = ac->batchcount;
 #if DEBUG
@@ -3985,7 +3986,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
                return -ENOMEM;
 
        for_each_online_cpu(i) {
-               new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
+               new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
                                                batchcount, gfp);
                if (!new->new[i]) {
                        for (i--; i >= 0; i--)
@@ -4007,9 +4008,9 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
                struct array_cache *ccold = new->new[i];
                if (!ccold)
                        continue;
-               spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
-               free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
-               spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
+               spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+               free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
+               spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
                kfree(ccold);
        }
        kfree(new);
@@ -4115,7 +4116,7 @@ static void cache_reap(struct work_struct *w)
 {
        struct kmem_cache *searchp;
        struct kmem_list3 *l3;
-       int node = numa_node_id();
+       int node = numa_mem_id();
        struct delayed_work *work = to_delayed_work(w);
 
        if (!mutex_trylock(&cache_chain_mutex))
index fd8b283..f28ad2c 100644 (file)
@@ -632,13 +632,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
                iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
                                        GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
                if (!iucv_irq_data[cpu])
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(-ENOMEM);
+
                iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
                                     GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
                if (!iucv_param[cpu]) {
                        kfree(iucv_irq_data[cpu]);
                        iucv_irq_data[cpu] = NULL;
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(-ENOMEM);
                }
                iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
                                        GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
@@ -647,7 +648,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
                        iucv_param[cpu] = NULL;
                        kfree(iucv_irq_data[cpu]);
                        iucv_irq_data[cpu] = NULL;
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(-ENOMEM);
                }
                break;
        case CPU_UP_CANCELED:
@@ -677,7 +678,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
                cpu_clear(cpu, cpumask);
                if (cpus_empty(cpumask))
                        /* Can't offline last IUCV enabled cpu. */
-                       return NOTIFY_BAD;
+                       return notifier_from_errno(-EINVAL);
                smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
                if (cpus_empty(iucv_irq_cpumask))
                        smp_call_function_single(first_cpu(iucv_buffer_cpumask),
index b7cd8cc..2a96751 100644 (file)
@@ -2293,6 +2293,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
        struct sockaddr *addr = args->dstaddr;
        struct rpc_xprt *xprt;
        struct sock_xprt *transport;
+       struct rpc_xprt *ret;
 
        xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
        if (IS_ERR(xprt))
@@ -2330,8 +2331,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
                xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
                break;
        default:
-               kfree(xprt);
-               return ERR_PTR(-EAFNOSUPPORT);
+               ret = ERR_PTR(-EAFNOSUPPORT);
+               goto out_err;
        }
 
        if (xprt_bound(xprt))
@@ -2346,10 +2347,11 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
        if (try_module_get(THIS_MODULE))
                return xprt;
-
+       ret = ERR_PTR(-EINVAL);
+out_err:
        kfree(xprt->slot);
        kfree(xprt);
-       return ERR_PTR(-EINVAL);
+       return ret;
 }
 
 static const struct rpc_timeout xs_tcp_default_timeout = {
@@ -2368,6 +2370,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
        struct sockaddr *addr = args->dstaddr;
        struct rpc_xprt *xprt;
        struct sock_xprt *transport;
+       struct rpc_xprt *ret;
 
        xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
        if (IS_ERR(xprt))
@@ -2403,8 +2406,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
                xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
                break;
        default:
-               kfree(xprt);
-               return ERR_PTR(-EAFNOSUPPORT);
+               ret = ERR_PTR(-EAFNOSUPPORT);
+               goto out_err;
        }
 
        if (xprt_bound(xprt))
@@ -2420,10 +2423,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 
        if (try_module_get(THIS_MODULE))
                return xprt;
-
+       ret = ERR_PTR(-EINVAL);
+out_err:
        kfree(xprt->slot);
        kfree(xprt);
-       return ERR_PTR(-EINVAL);
+       return ret;
 }
 
 /**
@@ -2437,6 +2441,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
        struct rpc_xprt *xprt;
        struct sock_xprt *transport;
        struct svc_sock *bc_sock;
+       struct rpc_xprt *ret;
 
        xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
        if (IS_ERR(xprt))
@@ -2476,8 +2481,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
                                   RPCBIND_NETID_TCP6);
                break;
        default:
-               kfree(xprt);
-               return ERR_PTR(-EAFNOSUPPORT);
+               ret = ERR_PTR(-EAFNOSUPPORT);
+               goto out_err;
        }
 
        if (xprt_bound(xprt))
@@ -2499,9 +2504,11 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 
        if (try_module_get(THIS_MODULE))
                return xprt;
+       ret = ERR_PTR(-EINVAL);
+out_err:
        kfree(xprt->slot);
        kfree(xprt);
-       return ERR_PTR(-EINVAL);
+       return ret;
 }
 
 static struct xprt_class       xs_udp_transport = {
index 76af5f9..a932ae5 100644 (file)
@@ -242,6 +242,7 @@ case "$arg" in
                echo "$output_file" | grep -q "\.gz$" && compr="gzip -9 -f"
                echo "$output_file" | grep -q "\.bz2$" && compr="bzip2 -9 -f"
                echo "$output_file" | grep -q "\.lzma$" && compr="lzma -9 -f"
+               echo "$output_file" | grep -q "\.lzo$" && compr="lzop -9 -f"
                echo "$output_file" | grep -q "\.cpio$" && compr="cat"
                shift
                ;;
index 5d4402a..38783dc 100644 (file)
@@ -124,6 +124,7 @@ extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
 extern int install_user_keyrings(void);
 extern int install_thread_keyring_to_cred(struct cred *);
 extern int install_process_keyring_to_cred(struct cred *);
+extern int install_session_keyring_to_cred(struct cred *, struct key *);
 
 extern struct key *request_key_and_link(struct key_type *type,
                                        const char *description,
index 8f4dce1..13074b4 100644 (file)
@@ -1269,7 +1269,7 @@ long keyctl_session_to_parent(void)
                goto not_permitted;
 
        /* the parent must be single threaded */
-       if (atomic_read(&parent->signal->count) != 1)
+       if (!thread_group_empty(parent))
                goto not_permitted;
 
        /* the parent and the child must have different session keyrings or
index 20a38fe..6b8e4ff 100644 (file)
@@ -216,8 +216,7 @@ static int install_process_keyring(void)
 /*
  * install a session keyring directly to a credentials struct
  */
-static int install_session_keyring_to_cred(struct cred *cred,
-                                          struct key *keyring)
+int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 {
        unsigned long flags;
        struct key *old;
index f656e9c..f5ec9ac 100644 (file)
@@ -58,6 +58,38 @@ void complete_request_key(struct key_construction *cons, int error)
 }
 EXPORT_SYMBOL(complete_request_key);
 
+static int umh_keys_init(struct subprocess_info *info)
+{
+       struct cred *cred = (struct cred*)current_cred();
+       struct key *keyring = info->data;
+       /*
+        * This is called in context of freshly forked kthread before
+        * kernel_execve(), we can just change our ->session_keyring.
+        */
+       return install_session_keyring_to_cred(cred, keyring);
+}
+
+static void umh_keys_cleanup(struct subprocess_info *info)
+{
+       struct key *keyring = info->data;
+       key_put(keyring);
+}
+
+static int call_usermodehelper_keys(char *path, char **argv, char **envp,
+                        struct key *session_keyring, enum umh_wait wait)
+{
+       gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
+       struct subprocess_info *info =
+               call_usermodehelper_setup(path, argv, envp, gfp_mask);
+
+       if (!info)
+               return -ENOMEM;
+
+       call_usermodehelper_setfns(info, umh_keys_init, umh_keys_cleanup,
+                                       key_get(session_keyring));
+       return call_usermodehelper_exec(info, wait);
+}
+
 /*
  * request userspace finish the construction of a key
  * - execute "/sbin/request-key <op> <key> <uid> <gid> <keyring> <keyring> <keyring>"
index 1e6a9e4..6b4b6da 100644 (file)
@@ -15,6 +15,9 @@ suffix_$(CONFIG_INITRAMFS_COMPRESSION_BZIP2)  = .bz2
 # Lzma
 suffix_$(CONFIG_INITRAMFS_COMPRESSION_LZMA)   = .lzma
 
+# Lzo
+suffix_$(CONFIG_INITRAMFS_COMPRESSION_LZO)   = .lzo
+
 # Generate builtin.o based on initramfs_data.o
 obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data$(suffix_y).o
 
@@ -45,7 +48,7 @@ endif
 quiet_cmd_initfs = GEN     $@
       cmd_initfs = $(initramfs) -o $@ $(ramfs-args) $(ramfs-input)
 
-targets := initramfs_data.cpio.gz initramfs_data.cpio.bz2 initramfs_data.cpio.lzma initramfs_data.cpio
+targets := initramfs_data.cpio.gz initramfs_data.cpio.bz2 initramfs_data.cpio.lzma initramfs_data.cpio.lzo initramfs_data.cpio
 # do not try to update files included in initramfs
 $(deps_initramfs): ;
 
diff --git a/usr/initramfs_data.lzo.S b/usr/initramfs_data.lzo.S
new file mode 100644 (file)
index 0000000..5921190
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+  initramfs_data includes the compressed binary that is the
+  filesystem used for early user space.
+  Note: Older versions of "as" (prior to binutils 2.11.90.0.23
+  released on 2001-07-14) dit not support .incbin.
+  If you are forced to use older binutils than that then the
+  following trick can be applied to create the resulting binary:
+
+
+  ld -m elf_i386  --format binary --oformat elf32-i386 -r \
+  -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o
+   ld -m elf_i386  -r -o built-in.o initramfs_data.o
+
+  initramfs_data.scr looks like this:
+SECTIONS
+{
+       .init.ramfs : { *(.data) }
+}
+
+  The above example is for i386 - the parameters vary from architectures.
+  Eventually look up LDFLAGS_BLOB in an older version of the
+  arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced.
+
+  Using .incbin has the advantage over ld that the correct flags are set
+  in the ELF header, as required by certain architectures.
+*/
+
+.section .init.ramfs,"a"
+.incbin "usr/initramfs_data.cpio.lzo"