[kernel] r15483 - in dists/sid/linux-2.6/debian: . config/kernelarch-x86 patches/features/all/ramzswap patches/series

Ben Hutchings benh at alioth.debian.org
Sun Apr 4 19:07:22 UTC 2010


Author: benh
Date: Sun Apr  4 19:07:12 2010
New Revision: 15483

Log:
[x86] Add ramzswap driver (Closes: #573912)

Added:
   dists/sid/linux-2.6/debian/patches/features/all/ramzswap/
   dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-add-TODO-file.patch
   dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-add.patch
   dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-documentation.patch
   dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-remove-ARM-specific-d-cache-hack.patch
   dists/sid/linux-2.6/debian/patches/features/all/ramzswap/xvmalloc-memory-allocator.patch
Modified:
   dists/sid/linux-2.6/debian/changelog
   dists/sid/linux-2.6/debian/config/kernelarch-x86/config
   dists/sid/linux-2.6/debian/patches/series/11

Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================
--- dists/sid/linux-2.6/debian/changelog	Sun Apr  4 08:36:36 2010	(r15482)
+++ dists/sid/linux-2.6/debian/changelog	Sun Apr  4 19:07:12 2010	(r15483)
@@ -22,6 +22,7 @@
   * linux-base: Convert disk IDs in crypttab (Closes: #575056)
   * linux-base: Redirect stdin and stdout of child processes to avoid
     interfering with debconf (Closes: #574987)
+  * [x86] Add ramzswap driver (Closes: #573912)
 
   [ maximilian attems]
   * [alpha, hppa] Disable oprofile as tracing code is unsupported here.

Modified: dists/sid/linux-2.6/debian/config/kernelarch-x86/config
==============================================================================
--- dists/sid/linux-2.6/debian/config/kernelarch-x86/config	Sun Apr  4 08:36:36 2010	(r15482)
+++ dists/sid/linux-2.6/debian/config/kernelarch-x86/config	Sun Apr  4 19:07:12 2010	(r15483)
@@ -1122,6 +1122,12 @@
 CONFIG_POHMELFS_CRYPTO=y
 
 ##
+## file: drivers/staging/ramzswap/Kconfig
+##
+CONFIG_RAMZSWAP=m
+CONFIG_RAMZSWAP_STATS=y
+
+##
 ## file: drivers/staging/rt2860/Kconfig
 ##
 CONFIG_RT2860=m

Added: dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-add-TODO-file.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-add-TODO-file.patch	Sun Apr  4 19:07:12 2010	(r15483)
@@ -0,0 +1,29 @@
+From 224f0ef4e20327ab108e6a11ebc3a92f337c5e85 Mon Sep 17 00:00:00 2001
+From: Nitin Gupta <ngupta at vflare.org>
+Date: Tue, 22 Sep 2009 15:32:33 +0530
+Subject: [PATCH 4/5] Staging: ramzswap: add TODO file
+
+TODO file for ramzswap.
+
+Signed-off-by: Nitin Gupta <ngupta at vflare.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ drivers/staging/ramzswap/TODO |    6 ++++++
+ 1 files changed, 6 insertions(+), 0 deletions(-)
+ create mode 100644 drivers/staging/ramzswap/TODO
+
+diff --git a/drivers/staging/ramzswap/TODO b/drivers/staging/ramzswap/TODO
+new file mode 100644
+index 0000000..bac40d6
+--- /dev/null
++++ b/drivers/staging/ramzswap/TODO
+@@ -0,0 +1,6 @@
++TODO:
++	- Add support for swap notifiers
++	- Remove CONFIG_ARM hack
++
++Please send patches to Greg Kroah-Hartman <greg at kroah.com> and
++Nitin Gupta <ngupta at vflare.org>
+-- 
+1.7.0.3
+

Added: dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-add.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-add.patch	Sun Apr  4 19:07:12 2010	(r15483)
@@ -0,0 +1,1769 @@
+From 306b0c957f3f0e7da6551652abbfe17b560173ce Mon Sep 17 00:00:00 2001
+From: Nitin Gupta <ngupta at vflare.org>
+Date: Tue, 22 Sep 2009 10:26:53 +0530
+Subject: [PATCH 2/5] Staging: virtual block device driver (ramzswap)
+
+Creates RAM based block devices (/dev/ramzswapX) which can be
+used (only) as swap disks. Pages swapped to these are compressed
+and stored in memory itself.
+
+The module is called ramzswap.ko. It depends on:
+ - xvmalloc memory allocator (compiled with this driver)
+ - lzo_compress.ko
+ - lzo_decompress.ko
+
+See ramzswap.txt for usage details.
+
+Signed-off-by: Nitin Gupta <ngupta at vflare.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+[bwh: Adjusted context for Debian's 2.6.32]
+---
+ drivers/staging/Kconfig                   |    2 +
+ drivers/staging/Makefile                  |    1 +
+ drivers/staging/ramzswap/Kconfig          |   21 +
+ drivers/staging/ramzswap/Makefile         |    3 +
+ drivers/staging/ramzswap/ramzswap_drv.c   | 1435 +++++++++++++++++++++++++++++
+ drivers/staging/ramzswap/ramzswap_drv.h   |  171 ++++
+ drivers/staging/ramzswap/ramzswap_ioctl.h |   49 +
+ 7 files changed, 1682 insertions(+), 0 deletions(-)
+ create mode 100644 drivers/staging/ramzswap/Kconfig
+ create mode 100644 drivers/staging/ramzswap/Makefile
+ create mode 100644 drivers/staging/ramzswap/ramzswap_drv.c
+ create mode 100644 drivers/staging/ramzswap/ramzswap_drv.h
+ create mode 100644 drivers/staging/ramzswap/ramzswap_ioctl.h
+
+diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
+index 37ec213..f012304 100644
+--- a/drivers/staging/Kconfig
++++ b/drivers/staging/Kconfig
+@@ -123,6 +123,8 @@ source "drivers/staging/sep/Kconfig"
+ 
+ source "drivers/staging/iio/Kconfig"
+ 
++source "drivers/staging/ramzswap/Kconfig"
++
+ source "drivers/staging/speakup/Kconfig"
+ 
+ endif # !STAGING_EXCLUDE_BUILD
+diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
+index ff7222d..0c0dd53 100644
+--- a/drivers/staging/Makefile
++++ b/drivers/staging/Makefile
+@@ -43,4 +43,5 @@ obj-$(CONFIG_VME_BUS)		+= vme/
+ obj-$(CONFIG_RAR_REGISTER)	+= rar/
+ obj-$(CONFIG_DX_SEP)		+= sep/
+ obj-$(CONFIG_IIO)		+= iio/
++obj-$(CONFIG_RAMZSWAP)		+= ramzswap/
+ obj-$(CONFIG_SPEAKUP)		+= speakup/
+diff --git a/drivers/staging/ramzswap/Kconfig b/drivers/staging/ramzswap/Kconfig
+new file mode 100644
+index 0000000..24e2569
+--- /dev/null
++++ b/drivers/staging/ramzswap/Kconfig
+@@ -0,0 +1,21 @@
++config RAMZSWAP
++	tristate "Compressed in-memory swap device (ramzswap)"
++	depends on SWAP
++	select LZO_COMPRESS
++	select LZO_DECOMPRESS
++	default n
++	help
++	  Creates virtual block devices which can be used (only) as a swap
++	  disks. Pages swapped to these disks are compressed and stored in
++	  memory itself.
++
++	  See ramzswap.txt for more information.
++	  Project home: http://compcache.googlecode.com/
++
++config RAMZSWAP_STATS
++	bool "Enable ramzswap stats"
++	depends on RAMZSWAP
++	default y
++	help
++	  Enable statistics collection for ramzswap. This adds only a minimal
++	  overhead. In unsure, say Y.
+diff --git a/drivers/staging/ramzswap/Makefile b/drivers/staging/ramzswap/Makefile
+new file mode 100644
+index 0000000..507d7dc
+--- /dev/null
++++ b/drivers/staging/ramzswap/Makefile
+@@ -0,0 +1,3 @@
++ramzswap-objs	:=	ramzswap_drv.o xvmalloc.o
++
++obj-$(CONFIG_RAMZSWAP)	+=	ramzswap.o
+diff --git a/drivers/staging/ramzswap/ramzswap_drv.c b/drivers/staging/ramzswap/ramzswap_drv.c
+new file mode 100644
+index 0000000..b839f05
+--- /dev/null
++++ b/drivers/staging/ramzswap/ramzswap_drv.c
+@@ -0,0 +1,1435 @@
++/*
++ * Compressed RAM based swap device
++ *
++ * Copyright (C) 2008, 2009  Nitin Gupta
++ *
++ * This code is released using a dual license strategy: BSD/GPL
++ * You can choose the licence that better fits your requirements.
++ *
++ * Released under the terms of 3-clause BSD License
++ * Released under the terms of GNU General Public License Version 2.0
++ *
++ * Project home: http://compcache.googlecode.com
++ */
++
++#define KMSG_COMPONENT "ramzswap"
++#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/bitops.h>
++#include <linux/blkdev.h>
++#include <linux/buffer_head.h>
++#include <linux/device.h>
++#include <linux/genhd.h>
++#include <linux/highmem.h>
++#include <linux/lzo.h>
++#include <linux/mutex.h>
++#include <linux/string.h>
++#include <linux/swap.h>
++#include <linux/swapops.h>
++#include <linux/vmalloc.h>
++#include <linux/version.h>
++
++#include "ramzswap_drv.h"
++
++/* Globals */
++static int ramzswap_major;
++static struct ramzswap *devices;
++
++/*
++ * Pages that compress to larger than this size are
++ * forwarded to backing swap, if present or stored
++ * uncompressed in memory otherwise.
++ */
++static unsigned int max_zpage_size;
++
++/* Module params (documentation at end) */
++static unsigned int num_devices;
++
++static int rzs_test_flag(struct ramzswap *rzs, u32 index,
++			enum rzs_pageflags flag)
++{
++	return rzs->table[index].flags & BIT(flag);
++}
++
++static void rzs_set_flag(struct ramzswap *rzs, u32 index,
++			enum rzs_pageflags flag)
++{
++	rzs->table[index].flags |= BIT(flag);
++}
++
++static void rzs_clear_flag(struct ramzswap *rzs, u32 index,
++			enum rzs_pageflags flag)
++{
++	rzs->table[index].flags &= ~BIT(flag);
++}
++
++static int page_zero_filled(void *ptr)
++{
++	unsigned int pos;
++	unsigned long *page;
++
++	page = (unsigned long *)ptr;
++
++	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
++		if (page[pos])
++			return 0;
++	}
++
++	return 1;
++}
++
++/*
++ * memlimit cannot be greater than backing disk size.
++ */
++static void ramzswap_set_memlimit(struct ramzswap *rzs, size_t totalram_bytes)
++{
++	int memlimit_valid = 1;
++
++	if (!rzs->memlimit) {
++		pr_info("Memory limit not set.\n");
++		memlimit_valid = 0;
++	}
++
++	if (rzs->memlimit > rzs->disksize) {
++		pr_info("Memory limit cannot be greater than "
++			"disksize: limit=%zu, disksize=%zu\n",
++			rzs->memlimit, rzs->disksize);
++		memlimit_valid = 0;
++	}
++
++	if (!memlimit_valid) {
++		size_t mempart, disksize;
++		pr_info("Using default: smaller of (%u%% of RAM) and "
++			"(backing disk size).\n",
++			default_memlimit_perc_ram);
++		mempart = default_memlimit_perc_ram * (totalram_bytes / 100);
++		disksize = rzs->disksize;
++		rzs->memlimit = mempart > disksize ? disksize : mempart;
++	}
++
++	if (rzs->memlimit > totalram_bytes / 2) {
++		pr_info(
++		"Its not advisable setting limit more than half of "
++		"size of memory since we expect a 2:1 compression ratio. "
++		"Limit represents amount of *compressed* data we can keep "
++		"in memory!\n"
++		"\tMemory Size: %zu kB\n"
++		"\tLimit you selected: %zu kB\n"
++		"Continuing anyway ...\n",
++		totalram_bytes >> 10, rzs->memlimit >> 10
++		);
++	}
++
++	rzs->memlimit &= PAGE_MASK;
++	BUG_ON(!rzs->memlimit);
++}
++
++static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
++{
++	if (!rzs->disksize) {
++		pr_info(
++		"disk size not provided. You can use disksize_kb module "
++		"param to specify size.\nUsing default: (%u%% of RAM).\n",
++		default_disksize_perc_ram
++		);
++		rzs->disksize = default_disksize_perc_ram *
++					(totalram_bytes / 100);
++	}
++
++	if (rzs->disksize > 2 * (totalram_bytes)) {
++		pr_info(
++		"There is little point creating a ramzswap of greater than "
++		"twice the size of memory since we expect a 2:1 compression "
++		"ratio. Note that ramzswap uses about 0.1%% of the size of "
++		"the swap device when not in use so a huge ramzswap is "
++		"wasteful.\n"
++		"\tMemory Size: %zu kB\n"
++		"\tSize you selected: %zu kB\n"
++		"Continuing anyway ...\n",
++		totalram_bytes >> 10, rzs->disksize
++		);
++	}
++
++	rzs->disksize &= PAGE_MASK;
++}
++
++/*
++ * Swap header (1st page of swap device) contains information
++ * to indentify it as a swap partition. Prepare such a header
++ * for ramzswap device (ramzswap0) so that swapon can identify
++ * it as swap partition. In case backing swap device is provided,
++ * copy its swap header.
++ */
++static int setup_swap_header(struct ramzswap *rzs, union swap_header *s)
++{
++	int ret = 0;
++	struct page *page;
++	struct address_space *mapping;
++	union swap_header *backing_swap_header;
++
++	/*
++	 * There is no backing swap device. Create a swap header
++	 * that is acceptable by swapon.
++	 */
++	if (!rzs->backing_swap) {
++		s->info.version = 1;
++		s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
++		s->info.nr_badpages = 0;
++		memcpy(s->magic.magic, "SWAPSPACE2", 10);
++		return 0;
++	}
++
++	/*
++	 * We have a backing swap device. Copy its swap header
++	 * to ramzswap device header. If this header contains
++	 * invalid information (backing device not a swap
++	 * partition, etc.), swapon will fail for ramzswap
++	 * which is correct behavior - we don't want to swap
++	 * over filesystem partition!
++	 */
++
++	/* Read the backing swap header (code from sys_swapon) */
++	mapping = rzs->swap_file->f_mapping;
++	if (!mapping->a_ops->readpage) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	page = read_mapping_page(mapping, 0, rzs->swap_file);
++	if (IS_ERR(page)) {
++		ret = PTR_ERR(page);
++		goto out;
++	}
++
++	backing_swap_header = kmap(page);
++	memcpy(s, backing_swap_header, sizeof(*s));
++	if (s->info.nr_badpages) {
++		pr_info("Cannot use backing swap with bad pages (%u)\n",
++			s->info.nr_badpages);
++		ret = -EINVAL;
++	}
++	/*
++	 * ramzswap disksize equals number of usable pages in backing
++	 * swap. Set last_page in swap header to match this disksize
++	 * ('last_page' means 0-based index of last usable swap page).
++	 */
++	s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
++	kunmap(page);
++
++out:
++	return ret;
++}
++
++static void ramzswap_flush_dcache_page(struct page *page)
++{
++#ifdef CONFIG_ARM
++	int flag = 0;
++	/*
++	 * Ugly hack to get flush_dcache_page() work on ARM.
++	 * page_mapping(page) == NULL after clearing this swap cache flag.
++	 * Without clearing this flag, flush_dcache_page() will simply set
++	 * "PG_dcache_dirty" bit and return.
++	 */
++	if (PageSwapCache(page)) {
++		flag = 1;
++		ClearPageSwapCache(page);
++	}
++#endif
++	flush_dcache_page(page);
++#ifdef CONFIG_ARM
++	if (flag)
++		SetPageSwapCache(page);
++#endif
++}
++
++void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
++			struct ramzswap_ioctl_stats *s)
++{
++	strncpy(s->backing_swap_name, rzs->backing_swap_name,
++		MAX_SWAP_NAME_LEN - 1);
++	s->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
++
++	s->disksize = rzs->disksize;
++	s->memlimit = rzs->memlimit;
++
++#if defined(CONFIG_RAMZSWAP_STATS)
++	{
++	struct ramzswap_stats *rs = &rzs->stats;
++	size_t succ_writes, mem_used;
++	unsigned int good_compress_perc = 0, no_compress_perc = 0;
++
++	mem_used = xv_get_total_size_bytes(rzs->mem_pool)
++			+ (rs->pages_expand << PAGE_SHIFT);
++	succ_writes = rs->num_writes - rs->failed_writes;
++
++	if (succ_writes && rs->pages_stored) {
++		good_compress_perc = rs->good_compress * 100
++					/ rs->pages_stored;
++		no_compress_perc = rs->pages_expand * 100
++					/ rs->pages_stored;
++	}
++
++	s->num_reads = rs->num_reads;
++	s->num_writes = rs->num_writes;
++	s->failed_reads = rs->failed_reads;
++	s->failed_writes = rs->failed_writes;
++	s->invalid_io = rs->invalid_io;
++	s->pages_zero = rs->pages_zero;
++
++	s->good_compress_pct = good_compress_perc;
++	s->pages_expand_pct = no_compress_perc;
++
++	s->pages_stored = rs->pages_stored;
++	s->pages_used = mem_used >> PAGE_SHIFT;
++	s->orig_data_size = rs->pages_stored << PAGE_SHIFT;
++	s->compr_data_size = rs->compr_size;
++	s->mem_used_total = mem_used;
++
++	s->bdev_num_reads = rs->bdev_num_reads;
++	s->bdev_num_writes = rs->bdev_num_writes;
++	}
++#endif /* CONFIG_RAMZSWAP_STATS */
++}
++
++static int add_backing_swap_extent(struct ramzswap *rzs,
++				pgoff_t phy_pagenum,
++				pgoff_t num_pages)
++{
++	unsigned int idx;
++	struct list_head *head;
++	struct page *curr_page, *new_page;
++	unsigned int extents_per_page = PAGE_SIZE /
++				sizeof(struct ramzswap_backing_extent);
++
++	idx = rzs->num_extents % extents_per_page;
++	if (!idx) {
++		new_page = alloc_page(__GFP_ZERO);
++		if (!new_page)
++			return -ENOMEM;
++
++		if (rzs->num_extents) {
++			curr_page = virt_to_page(rzs->curr_extent);
++			head = &curr_page->lru;
++		} else {
++			head = &rzs->backing_swap_extent_list;
++		}
++
++		list_add(&new_page->lru, head);
++		rzs->curr_extent = page_address(new_page);
++	}
++
++	rzs->curr_extent->phy_pagenum = phy_pagenum;
++	rzs->curr_extent->num_pages = num_pages;
++
++	pr_debug("add_extent: idx=%u, phy_pgnum=%lu, num_pgs=%lu, "
++		"pg_last=%lu, curr_ext=%p\n", idx, phy_pagenum, num_pages,
++		phy_pagenum + num_pages - 1, rzs->curr_extent);
++
++	if (idx != extents_per_page - 1)
++		rzs->curr_extent++;
++
++	return 0;
++}
++
++static int setup_backing_swap_extents(struct ramzswap *rzs,
++				struct inode *inode, unsigned long *num_pages)
++{
++	int ret = 0;
++	unsigned blkbits;
++	unsigned blocks_per_page;
++	pgoff_t contig_pages = 0, total_pages = 0;
++	pgoff_t pagenum = 0, prev_pagenum = 0;
++	sector_t probe_block = 0;
++	sector_t last_block;
++
++	blkbits = inode->i_blkbits;
++	blocks_per_page = PAGE_SIZE >> blkbits;
++
++	last_block = i_size_read(inode) >> blkbits;
++	while (probe_block + blocks_per_page <= last_block) {
++		unsigned block_in_page;
++		sector_t first_block;
++
++		first_block = bmap(inode, probe_block);
++		if (first_block == 0)
++			goto bad_bmap;
++
++		/* It must be PAGE_SIZE aligned on-disk */
++		if (first_block & (blocks_per_page - 1)) {
++			probe_block++;
++			goto probe_next;
++		}
++
++		/* All blocks within this page must be contiguous on disk */
++		for (block_in_page = 1; block_in_page < blocks_per_page;
++					block_in_page++) {
++			sector_t block;
++
++			block = bmap(inode, probe_block + block_in_page);
++			if (block == 0)
++				goto bad_bmap;
++			if (block != first_block + block_in_page) {
++				/* Discontiguity */
++				probe_block++;
++				goto probe_next;
++			}
++		}
++
++		/*
++		 * We found a PAGE_SIZE length, PAGE_SIZE aligned
++		 * run of blocks.
++		 */
++		pagenum = first_block >> (PAGE_SHIFT - blkbits);
++
++		if (total_pages && (pagenum != prev_pagenum + 1)) {
++			ret = add_backing_swap_extent(rzs, prev_pagenum -
++					(contig_pages - 1), contig_pages);
++			if (ret < 0)
++				goto out;
++			rzs->num_extents++;
++			contig_pages = 0;
++		}
++		total_pages++;
++		contig_pages++;
++		prev_pagenum = pagenum;
++		probe_block += blocks_per_page;
++
++probe_next:
++		continue;
++	}
++
++	if (contig_pages) {
++		pr_debug("adding last extent: pagenum=%lu, "
++			"contig_pages=%lu\n", pagenum, contig_pages);
++		ret = add_backing_swap_extent(rzs,
++			prev_pagenum - (contig_pages - 1), contig_pages);
++		if (ret < 0)
++			goto out;
++		rzs->num_extents++;
++	}
++	if (!rzs->num_extents) {
++		pr_err("No swap extents found!\n");
++		ret = -EINVAL;
++	}
++
++	if (!ret) {
++		*num_pages = total_pages;
++		pr_info("Found %lu extents containing %luk\n",
++			rzs->num_extents, *num_pages << (PAGE_SHIFT - 10));
++	}
++	goto out;
++
++bad_bmap:
++	pr_err("Backing swapfile has holes\n");
++	ret = -EINVAL;
++out:
++	while (ret && !list_empty(&rzs->backing_swap_extent_list)) {
++		struct page *page;
++		struct list_head *entry = rzs->backing_swap_extent_list.next;
++		page = list_entry(entry, struct page, lru);
++		list_del(entry);
++		__free_page(page);
++	}
++	return ret;
++}
++
++static void map_backing_swap_extents(struct ramzswap *rzs)
++{
++	struct ramzswap_backing_extent *se;
++	struct page *table_page, *se_page;
++	unsigned long num_pages, num_table_pages, entry;
++	unsigned long se_idx, span;
++	unsigned entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
++	unsigned extents_per_page = PAGE_SIZE / sizeof(*se);
++
++	/* True for block device */
++	if (!rzs->num_extents)
++		return;
++
++	se_page = list_entry(rzs->backing_swap_extent_list.next,
++					struct page, lru);
++	se = page_address(se_page);
++	span = se->num_pages;
++	num_pages = rzs->disksize >> PAGE_SHIFT;
++	num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
++							PAGE_SIZE);
++
++	entry = 0;
++	se_idx = 0;
++	while (num_table_pages--) {
++		table_page = vmalloc_to_page(&rzs->table[entry]);
++		while (span <= entry) {
++			se_idx++;
++			if (se_idx == rzs->num_extents)
++				BUG();
++
++			if (!(se_idx % extents_per_page)) {
++				se_page = list_entry(se_page->lru.next,
++						struct page, lru);
++				se = page_address(se_page);
++			} else
++				se++;
++
++			span += se->num_pages;
++		}
++		table_page->mapping = (struct address_space *)se;
++		table_page->private = se->num_pages - (span - entry);
++		pr_debug("map_table: entry=%lu, span=%lu, map=%p, priv=%lu\n",
++			entry, span, table_page->mapping, table_page->private);
++		entry += entries_per_page;
++	}
++}
++
++/*
++ * Check if value of backing_swap module param is sane.
++ * Claim this device and set ramzswap size equal to
++ * size of this block device.
++ */
++static int setup_backing_swap(struct ramzswap *rzs)
++{
++	int ret = 0;
++	size_t disksize;
++	unsigned long num_pages = 0;
++	struct inode *inode;
++	struct file *swap_file;
++	struct address_space *mapping;
++	struct block_device *bdev = NULL;
++
++	if (!rzs->backing_swap_name[0]) {
++		pr_debug("backing_swap param not given\n");
++		goto out;
++	}
++
++	pr_info("Using backing swap device: %s\n", rzs->backing_swap_name);
++
++	swap_file = filp_open(rzs->backing_swap_name,
++				O_RDWR | O_LARGEFILE, 0);
++	if (IS_ERR(swap_file)) {
++		pr_err("Error opening backing device: %s\n",
++			rzs->backing_swap_name);
++		ret = -EINVAL;
++		goto out;
++	}
++
++	mapping = swap_file->f_mapping;
++	inode = mapping->host;
++
++	if (S_ISBLK(inode->i_mode)) {
++		bdev = I_BDEV(inode);
++		ret = bd_claim(bdev, setup_backing_swap);
++		if (ret < 0) {
++			bdev = NULL;
++			goto bad_param;
++		}
++		disksize = i_size_read(inode);
++	} else if (S_ISREG(inode->i_mode)) {
++		bdev = inode->i_sb->s_bdev;
++		if (IS_SWAPFILE(inode)) {
++			ret = -EBUSY;
++			goto bad_param;
++		}
++		ret = setup_backing_swap_extents(rzs, inode, &num_pages);
++		if (ret < 0)
++			goto bad_param;
++		disksize = num_pages << PAGE_SHIFT;
++	} else {
++		goto bad_param;
++	}
++
++	rzs->swap_file = swap_file;
++	rzs->backing_swap = bdev;
++	rzs->disksize = disksize;
++	BUG_ON(!rzs->disksize);
++
++	return 0;
++
++bad_param:
++	if (bdev)
++		bd_release(bdev);
++	filp_close(swap_file, NULL);
++
++out:
++	rzs->backing_swap = NULL;
++	return ret;
++}
++
++/*
++ * Map logical page number 'pagenum' to physical page number
++ * on backing swap device. For block device, this is a nop.
++ */
++u32 map_backing_swap_page(struct ramzswap *rzs, u32 pagenum)
++{
++	u32 skip_pages, entries_per_page;
++	size_t delta, se_offset, skipped;
++	struct page *table_page, *se_page;
++	struct ramzswap_backing_extent *se;
++
++	if (!rzs->num_extents)
++		return pagenum;
++
++	entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
++
++	table_page = vmalloc_to_page(&rzs->table[pagenum]);
++	se = (struct ramzswap_backing_extent *)table_page->mapping;
++	se_page = virt_to_page(se);
++
++	skip_pages = pagenum - (pagenum / entries_per_page * entries_per_page);
++	se_offset = table_page->private + skip_pages;
++
++	if (se_offset < se->num_pages)
++		return se->phy_pagenum + se_offset;
++
++	skipped = se->num_pages - table_page->private;
++	do {
++		struct ramzswap_backing_extent *se_base;
++		u32 se_entries_per_page = PAGE_SIZE / sizeof(*se);
++
++		/* Get next swap extent */
++		se_base = (struct ramzswap_backing_extent *)
++						page_address(se_page);
++		if (se - se_base == se_entries_per_page - 1) {
++			se_page = list_entry(se_page->lru.next,
++						struct page, lru);
++			se = page_address(se_page);
++		} else {
++			se++;
++		}
++
++		skipped += se->num_pages;
++	} while (skipped < skip_pages);
++
++	delta = skipped - skip_pages;
++	se_offset = se->num_pages - delta;
++
++	return se->phy_pagenum + se_offset;
++}
++
++static void ramzswap_free_page(struct ramzswap *rzs, size_t index)
++{
++	u32 clen;
++	void *obj;
++
++	struct page *page = rzs->table[index].page;
++	u32 offset = rzs->table[index].offset;
++
++	if (unlikely(!page)) {
++		if (rzs_test_flag(rzs, index, RZS_ZERO)) {
++			rzs_clear_flag(rzs, index, RZS_ZERO);
++			stat_dec(rzs->stats.pages_zero);
++		}
++		return;
++	}
++
++	if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
++		clen = PAGE_SIZE;
++		__free_page(page);
++		rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED);
++		stat_dec(rzs->stats.pages_expand);
++		goto out;
++	}
++
++	obj = kmap_atomic(page, KM_USER0) + offset;
++	clen = xv_get_object_size(obj) - sizeof(struct zobj_header);
++	kunmap_atomic(obj, KM_USER0);
++
++	xv_free(rzs->mem_pool, page, offset);
++	if (clen <= PAGE_SIZE / 2)
++		stat_dec(rzs->stats.good_compress);
++
++out:
++	rzs->stats.compr_size -= clen;
++	stat_dec(rzs->stats.pages_stored);
++
++	rzs->table[index].page = NULL;
++	rzs->table[index].offset = 0;
++}
++
++static int handle_zero_page(struct bio *bio)
++{
++	void *user_mem;
++	struct page *page = bio->bi_io_vec[0].bv_page;
++
++	user_mem = kmap_atomic(page, KM_USER0);
++	memset(user_mem, 0, PAGE_SIZE);
++	kunmap_atomic(user_mem, KM_USER0);
++
++	ramzswap_flush_dcache_page(page);
++
++	set_bit(BIO_UPTODATE, &bio->bi_flags);
++	bio_endio(bio, 0);
++	return 0;
++}
++
++static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio)
++{
++	u32 index;
++	struct page *page;
++	unsigned char *user_mem, *cmem;
++
++	page = bio->bi_io_vec[0].bv_page;
++	index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
++
++	user_mem = kmap_atomic(page, KM_USER0);
++	cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
++			rzs->table[index].offset;
++
++	memcpy(user_mem, cmem, PAGE_SIZE);
++	kunmap_atomic(user_mem, KM_USER0);
++	kunmap_atomic(cmem, KM_USER1);
++
++	ramzswap_flush_dcache_page(page);
++
++	set_bit(BIO_UPTODATE, &bio->bi_flags);
++	bio_endio(bio, 0);
++	return 0;
++}
++
++
++/*
++ * Called when request page is not present in ramzswap.
++ * Its either in backing swap device (if present) or
++ * this is an attempt to read before any previous write
++ * to this location - this happens due to readahead when
++ * swap device is read from user-space (e.g. during swapon)
++ */
++static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio)
++{
++	/*
++	 * Always forward such requests to backing swap
++	 * device (if present)
++	 */
++	if (rzs->backing_swap) {
++		u32 pagenum;
++		stat_dec(rzs->stats.num_reads);
++		stat_inc(rzs->stats.bdev_num_reads);
++		bio->bi_bdev = rzs->backing_swap;
++
++		/*
++		 * In case backing swap is a file, find the right offset within
++		 * the file corresponding to logical position 'index'. For block
++		 * device, this is a nop.
++		 */
++		pagenum = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
++		bio->bi_sector = map_backing_swap_page(rzs, pagenum)
++					<< SECTORS_PER_PAGE_SHIFT;
++		return 1;
++	}
++
++	/*
++	 * Its unlikely event in case backing dev is
++	 * not present
++	 */
++	pr_debug("Read before write on swap device: "
++		"sector=%lu, size=%u, offset=%u\n",
++		(ulong)(bio->bi_sector), bio->bi_size,
++		bio->bi_io_vec[0].bv_offset);
++
++	/* Do nothing. Just return success */
++	set_bit(BIO_UPTODATE, &bio->bi_flags);
++	bio_endio(bio, 0);
++	return 0;
++}
++
++static int ramzswap_read(struct ramzswap *rzs, struct bio *bio)
++{
++	int ret;
++	u32 index;
++	size_t clen;
++	struct page *page;
++	struct zobj_header *zheader;
++	unsigned char *user_mem, *cmem;
++
++	stat_inc(rzs->stats.num_reads);
++
++	page = bio->bi_io_vec[0].bv_page;
++	index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
++
++	if (rzs_test_flag(rzs, index, RZS_ZERO))
++		return handle_zero_page(bio);
++
++	/* Requested page is not present in compressed area */
++	if (!rzs->table[index].page)
++		return handle_ramzswap_fault(rzs, bio);
++
++	/* Page is stored uncompressed since its incompressible */
++	if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
++		return handle_uncompressed_page(rzs, bio);
++
++	user_mem = kmap_atomic(page, KM_USER0);
++	clen = PAGE_SIZE;
++
++	cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
++			rzs->table[index].offset;
++
++	ret = lzo1x_decompress_safe(
++		cmem + sizeof(*zheader),
++		xv_get_object_size(cmem) - sizeof(*zheader),
++		user_mem, &clen);
++
++	kunmap_atomic(user_mem, KM_USER0);
++	kunmap_atomic(cmem, KM_USER1);
++
++	/* should NEVER happen */
++	if (unlikely(ret != LZO_E_OK)) {
++		pr_err("Decompression failed! err=%d, page=%u\n",
++			ret, index);
++		stat_inc(rzs->stats.failed_reads);
++		goto out;
++	}
++
++	ramzswap_flush_dcache_page(page);
++
++	set_bit(BIO_UPTODATE, &bio->bi_flags);
++	bio_endio(bio, 0);
++	return 0;
++
++out:
++	bio_io_error(bio);
++	return 0;
++}
++
++static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
++{
++	int ret, fwd_write_request = 0;
++	u32 offset, index;
++	size_t clen;
++	struct zobj_header *zheader;
++	struct page *page, *page_store;
++	unsigned char *user_mem, *cmem, *src;
++
++	stat_inc(rzs->stats.num_writes);
++
++	page = bio->bi_io_vec[0].bv_page;
++	index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
++
++	src = rzs->compress_buffer;
++
++	/*
++	 * System swaps to same sector again when the stored page
++	 * is no longer referenced by any process. So, its now safe
++	 * to free the memory that was allocated for this page.
++	 */
++	if (rzs->table[index].page)
++		ramzswap_free_page(rzs, index);
++
++	/*
++	 * No memory ia allocated for zero filled pages.
++	 * Simply clear zero page flag.
++	 */
++	if (rzs_test_flag(rzs, index, RZS_ZERO)) {
++		stat_dec(rzs->stats.pages_zero);
++		rzs_clear_flag(rzs, index, RZS_ZERO);
++	}
++
++	mutex_lock(&rzs->lock);
++
++	user_mem = kmap_atomic(page, KM_USER0);
++	if (page_zero_filled(user_mem)) {
++		kunmap_atomic(user_mem, KM_USER0);
++		mutex_unlock(&rzs->lock);
++		stat_inc(rzs->stats.pages_zero);
++		rzs_set_flag(rzs, index, RZS_ZERO);
++
++		set_bit(BIO_UPTODATE, &bio->bi_flags);
++		bio_endio(bio, 0);
++		return 0;
++	}
++
++	if (rzs->backing_swap &&
++		(rzs->stats.compr_size > rzs->memlimit - PAGE_SIZE)) {
++		kunmap_atomic(user_mem, KM_USER0);
++		mutex_unlock(&rzs->lock);
++		fwd_write_request = 1;
++		goto out;
++	}
++
++	ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
++				rzs->compress_workmem);
++
++	kunmap_atomic(user_mem, KM_USER0);
++
++	if (unlikely(ret != LZO_E_OK)) {
++		mutex_unlock(&rzs->lock);
++		pr_err("Compression failed! err=%d\n", ret);
++		stat_inc(rzs->stats.failed_writes);
++		goto out;
++	}
++
++	/*
++	 * Page is incompressible. Forward it to backing swap
++	 * if present. Otherwise, store it as-is (uncompressed)
++	 * since we do not want to return too many swap write
++	 * errors which has side effect of hanging the system.
++	 */
++	if (unlikely(clen > max_zpage_size)) {
++		if (rzs->backing_swap) {
++			mutex_unlock(&rzs->lock);
++			fwd_write_request = 1;
++			goto out;
++		}
++
++		clen = PAGE_SIZE;
++		page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
++		if (unlikely(!page_store)) {
++			mutex_unlock(&rzs->lock);
++			pr_info("Error allocating memory for incompressible "
++				"page: %u\n", index);
++			stat_inc(rzs->stats.failed_writes);
++			goto out;
++		}
++
++		offset = 0;
++		rzs_set_flag(rzs, index, RZS_UNCOMPRESSED);
++		stat_inc(rzs->stats.pages_expand);
++		rzs->table[index].page = page_store;
++		src = kmap_atomic(page, KM_USER0);
++		goto memstore;
++	}
++
++	if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader),
++			&rzs->table[index].page, &offset,
++			GFP_NOIO | __GFP_HIGHMEM)) {
++		mutex_unlock(&rzs->lock);
++		pr_info("Error allocating memory for compressed "
++			"page: %u, size=%zu\n", index, clen);
++		stat_inc(rzs->stats.failed_writes);
++		if (rzs->backing_swap)
++			fwd_write_request = 1;
++		goto out;
++	}
++
++memstore:
++	rzs->table[index].offset = offset;
++
++	cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
++			rzs->table[index].offset;
++
++#if 0
++	/* Back-reference needed for memory defragmentation */
++	if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) {
++		zheader = (struct zobj_header *)cmem;
++		zheader->table_idx = index;
++		cmem += sizeof(*zheader);
++	}
++#endif
++
++	memcpy(cmem, src, clen);
++
++	kunmap_atomic(cmem, KM_USER1);
++	if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
++		kunmap_atomic(src, KM_USER0);
++
++	/* Update stats */
++	rzs->stats.compr_size += clen;
++	stat_inc(rzs->stats.pages_stored);
++	if (clen <= PAGE_SIZE / 2)
++		stat_inc(rzs->stats.good_compress);
++
++	mutex_unlock(&rzs->lock);
++
++	set_bit(BIO_UPTODATE, &bio->bi_flags);
++	bio_endio(bio, 0);
++	return 0;
++
++out:
++	if (fwd_write_request) {
++		stat_inc(rzs->stats.bdev_num_writes);
++		bio->bi_bdev = rzs->backing_swap;
++#if 0
++		/*
++		 * TODO: We currently have linear mapping of ramzswap and
++		 * backing swap sectors. This is not desired since we want
++		 * to optimize writes to backing swap to minimize disk seeks
++		 * or have effective wear leveling (for SSDs). Also, a
++		 * non-linear mapping is required to implement compressed
++		 * on-disk swapping.
++		 */
++		 bio->bi_sector = get_backing_swap_page()
++					<< SECTORS_PER_PAGE_SHIFT;
++#endif
++		/*
++		 * In case backing swap is a file, find the right offset within
++		 * the file corresponding to logical position 'index'. For block
++		 * device, this is a nop.
++		 */
++		bio->bi_sector = map_backing_swap_page(rzs, index)
++					<< SECTORS_PER_PAGE_SHIFT;
++		return 1;
++	}
++
++	bio_io_error(bio);
++	return 0;
++}
++
++
++/*
++ * Check if request is within bounds and page aligned.
++ */
++static inline int valid_swap_request(struct ramzswap *rzs, struct bio *bio)
++{
++	if (unlikely(
++		(bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) ||
++		(bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
++		(bio->bi_vcnt != 1) ||
++		(bio->bi_size != PAGE_SIZE) ||
++		(bio->bi_io_vec[0].bv_offset != 0))) {
++
++		return 0;
++	}
++
++	/* swap request is valid */
++	return 1;
++}
++
++/*
++ * Handler function for all ramzswap I/O requests.
++ */
++static int ramzswap_make_request(struct request_queue *queue, struct bio *bio)
++{
++	int ret = 0;
++	struct ramzswap *rzs = queue->queuedata;
++
++	if (unlikely(!rzs->init_done)) {
++		bio_io_error(bio);
++		return 0;
++	}
++
++	if (!valid_swap_request(rzs, bio)) {
++		stat_inc(rzs->stats.invalid_io);
++		bio_io_error(bio);
++		return 0;
++	}
++
++	switch (bio_data_dir(bio)) {
++	case READ:
++		ret = ramzswap_read(rzs, bio);
++		break;
++
++	case WRITE:
++		ret = ramzswap_write(rzs, bio);
++		break;
++	}
++
++	return ret;
++}
++
++static void reset_device(struct ramzswap *rzs)
++{
++	int is_backing_blkdev = 0;
++	size_t index, num_pages;
++	unsigned entries_per_page;
++	unsigned long num_table_pages, entry = 0;
++
++	if (rzs->backing_swap && !rzs->num_extents)
++		is_backing_blkdev = 1;
++
++	num_pages = rzs->disksize >> PAGE_SHIFT;
++
++	/* Free various per-device buffers */
++	kfree(rzs->compress_workmem);
++	free_pages((unsigned long)rzs->compress_buffer, 1);
++
++	rzs->compress_workmem = NULL;
++	rzs->compress_buffer = NULL;
++
++	/* Free all pages that are still in this ramzswap device */
++	for (index = 0; index < num_pages; index++) {
++		struct page *page;
++		u16 offset;
++
++		page = rzs->table[index].page;
++		offset = rzs->table[index].offset;
++
++		if (!page)
++			continue;
++
++		if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
++			__free_page(page);
++		else
++			xv_free(rzs->mem_pool, page, offset);
++	}
++
++	entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
++	num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
++					PAGE_SIZE);
++	/*
++	 * Set page->mapping to NULL for every table page.
++	 * Otherwise, we will hit bad_page() during free.
++	 */
++	while (rzs->num_extents && num_table_pages--) {
++		struct page *page;
++		page = vmalloc_to_page(&rzs->table[entry]);
++		page->mapping = NULL;
++		entry += entries_per_page;
++	}
++	vfree(rzs->table);
++	rzs->table = NULL;
++
++	xv_destroy_pool(rzs->mem_pool);
++	rzs->mem_pool = NULL;
++
++	/* Free all swap extent pages */
++	while (!list_empty(&rzs->backing_swap_extent_list)) {
++		struct page *page;
++		struct list_head *entry;
++		entry = rzs->backing_swap_extent_list.next;
++		page = list_entry(entry, struct page, lru);
++		list_del(entry);
++		__free_page(page);
++	}
++	INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
++	rzs->num_extents = 0;
++
++	/* Close backing swap device, if present */
++	if (rzs->backing_swap) {
++		if (is_backing_blkdev)
++			bd_release(rzs->backing_swap);
++		filp_close(rzs->swap_file, NULL);
++		rzs->backing_swap = NULL;
++	}
++
++	/* Reset stats */
++	memset(&rzs->stats, 0, sizeof(rzs->stats));
++
++	rzs->disksize = 0;
++	rzs->memlimit = 0;
++
++	/* Back to uninitialized state */
++	rzs->init_done = 0;
++}
++
++static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
++{
++	int ret;
++	size_t num_pages;
++	struct page *page;
++	union swap_header *swap_header;
++
++	if (rzs->init_done) {
++		pr_info("Device already initialized!\n");
++		return -EBUSY;
++	}
++
++	ret = setup_backing_swap(rzs);
++	if (ret)
++		goto fail;
++
++	if (rzs->backing_swap)
++		ramzswap_set_memlimit(rzs, totalram_pages << PAGE_SHIFT);
++	else
++		ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
++
++	rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
++	if (!rzs->compress_workmem) {
++		pr_err("Error allocating compressor working memory!\n");
++		ret = -ENOMEM;
++		goto fail;
++	}
++
++	rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1);
++	if (!rzs->compress_buffer) {
++		pr_err("Error allocating compressor buffer space\n");
++		ret = -ENOMEM;
++		goto fail;
++	}
++
++	num_pages = rzs->disksize >> PAGE_SHIFT;
++	rzs->table = vmalloc(num_pages * sizeof(*rzs->table));
++	if (!rzs->table) {
++		pr_err("Error allocating ramzswap address table\n");
++		/* To prevent accessing table entries during cleanup */
++		rzs->disksize = 0;
++		ret = -ENOMEM;
++		goto fail;
++	}
++	memset(rzs->table, 0, num_pages * sizeof(*rzs->table));
++
++	map_backing_swap_extents(rzs);
++
++	page = alloc_page(__GFP_ZERO);
++	if (!page) {
++		pr_err("Error allocating swap header page\n");
++		ret = -ENOMEM;
++		goto fail;
++	}
++	rzs->table[0].page = page;
++	rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED);
++
++	swap_header = kmap(page);
++	ret = setup_swap_header(rzs, swap_header);
++	kunmap(page);
++	if (ret) {
++		pr_err("Error setting swap header\n");
++		goto fail;
++	}
++
++	set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT);
++
++	/*
++	 * We have ident mapping of sectors for ramzswap and
++	 * and the backing swap device. So, this queue flag
++	 * should be according to backing dev.
++	 */
++	if (!rzs->backing_swap ||
++			blk_queue_nonrot(rzs->backing_swap->bd_disk->queue))
++		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
++
++	rzs->mem_pool = xv_create_pool();
++	if (!rzs->mem_pool) {
++		pr_err("Error creating memory pool\n");
++		ret = -ENOMEM;
++		goto fail;
++	}
++
++	/*
++	 * Pages that compress to size greater than this are forwarded
++	 * to physical swap disk (if backing dev is provided)
++	 * TODO: make this configurable
++	 */
++	if (rzs->backing_swap)
++		max_zpage_size = max_zpage_size_bdev;
++	else
++		max_zpage_size = max_zpage_size_nobdev;
++	pr_debug("Max compressed page size: %u bytes\n", max_zpage_size);
++
++	rzs->init_done = 1;
++
++	pr_debug("Initialization done!\n");
++	return 0;
++
++fail:
++	reset_device(rzs);
++
++	pr_err("Initialization failed: err=%d\n", ret);
++	return ret;
++}
++
++static int ramzswap_ioctl_reset_device(struct ramzswap *rzs)
++{
++	if (rzs->init_done)
++		reset_device(rzs);
++
++	return 0;
++}
++
++static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
++			unsigned int cmd, unsigned long arg)
++{
++	int ret = 0;
++	size_t disksize_kb, memlimit_kb;
++
++	struct ramzswap *rzs = bdev->bd_disk->private_data;
++
++	switch (cmd) {
++	case RZSIO_SET_DISKSIZE_KB:
++		if (rzs->init_done) {
++			ret = -EBUSY;
++			goto out;
++		}
++		if (copy_from_user(&disksize_kb, (void *)arg,
++						_IOC_SIZE(cmd))) {
++			ret = -EFAULT;
++			goto out;
++		}
++		rzs->disksize = disksize_kb << 10;
++		pr_info("Disk size set to %zu kB\n", disksize_kb);
++		break;
++
++	case RZSIO_SET_MEMLIMIT_KB:
++		if (rzs->init_done) {
++			/* TODO: allow changing memlimit */
++			ret = -EBUSY;
++			goto out;
++		}
++		if (copy_from_user(&memlimit_kb, (void *)arg,
++						_IOC_SIZE(cmd))) {
++			ret = -EFAULT;
++			goto out;
++		}
++		rzs->memlimit = memlimit_kb << 10;
++		pr_info("Memory limit set to %zu kB\n", memlimit_kb);
++		break;
++
++	case RZSIO_SET_BACKING_SWAP:
++		if (rzs->init_done) {
++			ret = -EBUSY;
++			goto out;
++		}
++
++		if (copy_from_user(&rzs->backing_swap_name, (void *)arg,
++						_IOC_SIZE(cmd))) {
++			ret = -EFAULT;
++			goto out;
++		}
++		rzs->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
++		pr_info("Backing swap set to %s\n", rzs->backing_swap_name);
++		break;
++
++	case RZSIO_GET_STATS:
++	{
++		struct ramzswap_ioctl_stats *stats;
++		if (!rzs->init_done) {
++			ret = -ENOTTY;
++			goto out;
++		}
++		stats = kzalloc(sizeof(*stats), GFP_KERNEL);
++		if (!stats) {
++			ret = -ENOMEM;
++			goto out;
++		}
++		ramzswap_ioctl_get_stats(rzs, stats);
++		if (copy_to_user((void *)arg, stats, sizeof(*stats))) {
++			kfree(stats);
++			ret = -EFAULT;
++			goto out;
++		}
++		kfree(stats);
++		break;
++	}
++	case RZSIO_INIT:
++		ret = ramzswap_ioctl_init_device(rzs);
++		break;
++
++	case RZSIO_RESET:
++		/* Do not reset an active device! */
++		if (bdev->bd_holders) {
++			ret = -EBUSY;
++			goto out;
++		}
++		ret = ramzswap_ioctl_reset_device(rzs);
++		break;
++
++	default:
++		pr_info("Invalid ioctl %u\n", cmd);
++		ret = -ENOTTY;
++	}
++
++out:
++	return ret;
++}
++
++static struct block_device_operations ramzswap_devops = {
++	.ioctl = ramzswap_ioctl,
++	.owner = THIS_MODULE,
++};
++
++static void create_device(struct ramzswap *rzs, int device_id)
++{
++	mutex_init(&rzs->lock);
++	INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
++
++	rzs->queue = blk_alloc_queue(GFP_KERNEL);
++	if (!rzs->queue) {
++		pr_err("Error allocating disk queue for device %d\n",
++			device_id);
++		return;
++	}
++
++	blk_queue_make_request(rzs->queue, ramzswap_make_request);
++	rzs->queue->queuedata = rzs;
++
++	 /* gendisk structure */
++	rzs->disk = alloc_disk(1);
++	if (!rzs->disk) {
++		blk_cleanup_queue(rzs->queue);
++		pr_warning("Error allocating disk structure for device %d\n",
++			device_id);
++		return;
++	}
++
++	rzs->disk->major = ramzswap_major;
++	rzs->disk->first_minor = device_id;
++	rzs->disk->fops = &ramzswap_devops;
++	rzs->disk->queue = rzs->queue;
++	rzs->disk->private_data = rzs;
++	snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id);
++
++	/*
++	 * Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl
++	 * or set equal to backing swap device (if provided)
++	 */
++	set_capacity(rzs->disk, 0);
++	add_disk(rzs->disk);
++
++	rzs->init_done = 0;
++}
++
++static void destroy_device(struct ramzswap *rzs)
++{
++	if (rzs->disk) {
++		del_gendisk(rzs->disk);
++		put_disk(rzs->disk);
++	}
++
++	if (rzs->queue)
++		blk_cleanup_queue(rzs->queue);
++}
++
++static int __init ramzswap_init(void)
++{
++	int i, ret;
++
++	if (num_devices > max_num_devices) {
++		pr_warning("Invalid value for num_devices: %u\n",
++				num_devices);
++		return -EINVAL;
++	}
++
++	ramzswap_major = register_blkdev(0, "ramzswap");
++	if (ramzswap_major <= 0) {
++		pr_warning("Unable to get major number\n");
++		return -EBUSY;
++	}
++
++	if (!num_devices) {
++		pr_info("num_devices not specified. Using default: 1\n");
++		num_devices = 1;
++	}
++
++	/* Allocate the device array and initialize each one */
++	pr_info("Creating %u devices ...\n", num_devices);
++	devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL);
++	if (!devices) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	for (i = 0; i < num_devices; i++)
++		create_device(&devices[i], i);
++
++	return 0;
++out:
++	unregister_blkdev(ramzswap_major, "ramzswap");
++	return ret;
++}
++
++static void __exit ramzswap_exit(void)
++{
++	int i;
++	struct ramzswap *rzs;
++
++	for (i = 0; i < num_devices; i++) {
++		rzs = &devices[i];
++
++		destroy_device(rzs);
++		if (rzs->init_done)
++			reset_device(rzs);
++	}
++
++	unregister_blkdev(ramzswap_major, "ramzswap");
++
++	kfree(devices);
++	pr_debug("Cleanup done!\n");
++}
++
++module_param(num_devices, uint, 0);
++MODULE_PARM_DESC(num_devices, "Number of ramzswap devices");
++
++module_init(ramzswap_init);
++module_exit(ramzswap_exit);
++
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_AUTHOR("Nitin Gupta <ngupta at vflare.org>");
++MODULE_DESCRIPTION("Compressed RAM Based Swap Device");
+diff --git a/drivers/staging/ramzswap/ramzswap_drv.h b/drivers/staging/ramzswap/ramzswap_drv.h
+new file mode 100644
+index 0000000..a6ea240
+--- /dev/null
++++ b/drivers/staging/ramzswap/ramzswap_drv.h
+@@ -0,0 +1,171 @@
++/*
++ * Compressed RAM based swap device
++ *
++ * Copyright (C) 2008, 2009  Nitin Gupta
++ *
++ * This code is released using a dual license strategy: BSD/GPL
++ * You can choose the licence that better fits your requirements.
++ *
++ * Released under the terms of 3-clause BSD License
++ * Released under the terms of GNU General Public License Version 2.0
++ *
++ * Project home: http://compcache.googlecode.com
++ */
++
++#ifndef _RAMZSWAP_DRV_H_
++#define _RAMZSWAP_DRV_H_
++
++#include "ramzswap_ioctl.h"
++#include "xvmalloc.h"
++
++/*
++ * Some arbitrary value. This is just to catch
++ * invalid value for num_devices module parameter.
++ */
++static const unsigned max_num_devices = 32;
++
++/*
++ * Stored at beginning of each compressed object.
++ *
++ * It stores back-reference to table entry which points to this
++ * object. This is required to support memory defragmentation or
++ * migrating compressed pages to backing swap disk.
++ */
++struct zobj_header {
++#if 0
++	u32 table_idx;
++#endif
++};
++
++/*-- Configurable parameters */
++
++/* Default ramzswap disk size: 25% of total RAM */
++static const unsigned default_disksize_perc_ram = 25;
++static const unsigned default_memlimit_perc_ram = 15;
++
++/*
++ * Max compressed page size when backing device is provided.
++ * Pages that compress to size greater than this are sent to
++ * physical swap disk.
++ */
++static const unsigned max_zpage_size_bdev = PAGE_SIZE / 2;
++
++/*
++ * Max compressed page size when there is no backing dev.
++ * Pages that compress to size greater than this are stored
++ * uncompressed in memory.
++ */
++static const unsigned max_zpage_size_nobdev = PAGE_SIZE / 4 * 3;
++
++/*
++ * NOTE: max_zpage_size_{bdev,nobdev} sizes must be
++ * less than or equal to:
++ *   XV_MAX_ALLOC_SIZE - sizeof(struct zobj_header)
++ * since otherwise xv_malloc would always return failure.
++ */
++
++/*-- End of configurable params */
++
++#define SECTOR_SHIFT		9
++#define SECTOR_SIZE		(1 << SECTOR_SHIFT)
++#define SECTORS_PER_PAGE_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
++#define SECTORS_PER_PAGE	(1 << SECTORS_PER_PAGE_SHIFT)
++
++/* Debugging and Stats */
++#if defined(CONFIG_RAMZSWAP_STATS)
++#define stat_inc(stat)	((stat)++)
++#define stat_dec(stat)	((stat)--)
++#else
++#define stat_inc(x)
++#define stat_dec(x)
++#endif
++
++/* Flags for ramzswap pages (table[page_no].flags) */
++enum rzs_pageflags {
++	/* Page is stored uncompressed */
++	RZS_UNCOMPRESSED,
++
++	/* Page consists entirely of zeros */
++	RZS_ZERO,
++
++	__NR_RZS_PAGEFLAGS,
++};
++
++/*-- Data structures */
++
++/*
++ * Allocated for each swap slot, indexed by page no.
++ * These table entries must fit exactly in a page.
++ */
++struct table {
++	struct page *page;
++	u16 offset;
++	u8 count;	/* object ref count (not yet used) */
++	u8 flags;
++} __attribute__((aligned(4)));;
++
++/*
++ * Swap extent information in case backing swap is a regular
++ * file. These extent entries must fit exactly in a page.
++ */
++struct ramzswap_backing_extent {
++	pgoff_t phy_pagenum;
++	pgoff_t num_pages;
++} __attribute__((aligned(4)));
++
++struct ramzswap_stats {
++	/* basic stats */
++	size_t compr_size;	/* compressed size of pages stored -
++				 * needed to enforce memlimit */
++	/* more stats */
++#if defined(CONFIG_RAMZSWAP_STATS)
++	u64 num_reads;		/* failed + successful */
++	u64 num_writes;		/* --do-- */
++	u64 failed_reads;	/* can happen when memory is too low */
++	u64 failed_writes;	/* should NEVER! happen */
++	u64 invalid_io;		/* non-swap I/O requests */
++	u32 pages_zero;		/* no. of zero filled pages */
++	u32 pages_stored;	/* no. of pages currently stored */
++	u32 good_compress;	/* % of pages with compression ratio<=50% */
++	u32 pages_expand;	/* % of incompressible pages */
++	u64 bdev_num_reads;	/* no. of reads on backing dev */
++	u64 bdev_num_writes;	/* no. of writes on backing dev */
++#endif
++};
++
++struct ramzswap {
++	struct xv_pool *mem_pool;
++	void *compress_workmem;
++	void *compress_buffer;
++	struct table *table;
++	struct mutex lock;
++	struct request_queue *queue;
++	struct gendisk *disk;
++	int init_done;
++	/*
++	 * This is limit on compressed data size (stats.compr_size)
++	 * Its applicable only when backing swap device is present.
++	 */
++	size_t memlimit;	/* bytes */
++	/*
++	 * This is limit on amount of *uncompressed* worth of data
++	 * we can hold. When backing swap device is provided, it is
++	 * set equal to device size.
++	 */
++	size_t disksize;	/* bytes */
++
++	struct ramzswap_stats stats;
++
++	/* backing swap device info */
++	struct ramzswap_backing_extent *curr_extent;
++	struct list_head backing_swap_extent_list;
++	unsigned long num_extents;
++	char backing_swap_name[MAX_SWAP_NAME_LEN];
++	struct block_device *backing_swap;
++	struct file *swap_file;
++};
++
++/*-- */
++
++#endif
++
+diff --git a/drivers/staging/ramzswap/ramzswap_ioctl.h b/drivers/staging/ramzswap/ramzswap_ioctl.h
+new file mode 100644
+index 0000000..c713a09
+--- /dev/null
++++ b/drivers/staging/ramzswap/ramzswap_ioctl.h
+@@ -0,0 +1,49 @@
++/*
++ * Compressed RAM based swap device
++ *
++ * Copyright (C) 2008, 2009  Nitin Gupta
++ *
++ * This code is released using a dual license strategy: BSD/GPL
++ * You can choose the licence that better fits your requirements.
++ *
++ * Released under the terms of 3-clause BSD License
++ * Released under the terms of GNU General Public License Version 2.0
++ *
++ * Project home: http://compcache.googlecode.com
++ */
++
++#ifndef _RAMZSWAP_IOCTL_H_
++#define _RAMZSWAP_IOCTL_H_
++
++#define MAX_SWAP_NAME_LEN 128
++
++struct ramzswap_ioctl_stats {
++	char backing_swap_name[MAX_SWAP_NAME_LEN];
++	u64 memlimit;		/* only applicable if backing swap present */
++	u64 disksize;		/* user specified or equal to backing swap
++				 * size (if present) */
++	u64 num_reads;		/* failed + successful */
++	u64 num_writes;		/* --do-- */
++	u64 failed_reads;	/* can happen when memory is too low */
++	u64 failed_writes;	/* should NEVER! happen */
++	u64 invalid_io;		/* non-swap I/O requests */
++	u32 pages_zero;		/* no. of zero filled pages */
++	u32 good_compress_pct;	/* no. of pages with compression ratio<=50% */
++	u32 pages_expand_pct;	/* no. of incompressible pages */
++	u32 pages_stored;
++	u32 pages_used;
++	u64 orig_data_size;
++	u64 compr_data_size;
++	u64 mem_used_total;
++	u64 bdev_num_reads;	/* no. of reads on backing dev */
++	u64 bdev_num_writes;	/* no. of writes on backing dev */
++} __attribute__ ((packed, aligned(4)));
++
++#define RZSIO_SET_DISKSIZE_KB	_IOW('z', 0, size_t)
++#define RZSIO_SET_MEMLIMIT_KB	_IOW('z', 1, size_t)
++#define RZSIO_SET_BACKING_SWAP	_IOW('z', 2, unsigned char[MAX_SWAP_NAME_LEN])
++#define RZSIO_GET_STATS		_IOR('z', 3, struct ramzswap_ioctl_stats)
++#define RZSIO_INIT		_IO('z', 4)
++#define RZSIO_RESET		_IO('z', 5)
++
++#endif
+-- 
+1.7.0.3
+

Added: dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-documentation.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-documentation.patch	Sun Apr  4 19:07:12 2010	(r15483)
@@ -0,0 +1,74 @@
+From 47f9afb38f0de2f153deea34bf1ef5c778815f2e Mon Sep 17 00:00:00 2001
+From: Nitin Gupta <ngupta at vflare.org>
+Date: Tue, 22 Sep 2009 10:26:54 +0530
+Subject: [PATCH 3/5] Staging: ramzswap: documentation
+
+Short guide on how to setup and use ramzswap.
+
+Signed-off-by: Nitin Gupta <ngupta at vflare.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ drivers/staging/ramzswap/ramzswap.txt |   51 +++++++++++++++++++++++++++++++++
+ 1 files changed, 51 insertions(+), 0 deletions(-)
+ create mode 100644 drivers/staging/ramzswap/ramzswap.txt
+
+diff --git a/drivers/staging/ramzswap/ramzswap.txt b/drivers/staging/ramzswap/ramzswap.txt
+new file mode 100644
+index 0000000..e9f1619
+--- /dev/null
++++ b/drivers/staging/ramzswap/ramzswap.txt
+@@ -0,0 +1,51 @@
++ramzswap: Compressed RAM based swap device
++-------------------------------------------
++
++Project home: http://compcache.googlecode.com/
++
++* Introduction
++
++It creates RAM based block devices which can be used (only) as swap disks.
++Pages swapped to these devices are compressed and stored in memory itself.
++See project home for use cases, performance numbers and a lot more.
++
++Individual ramzswap devices are configured and initialized using rzscontrol
++userspace utility as shown in examples below. See rzscontrol man page for more
++details.
++
++* Usage
++
++Following shows a typical sequence of steps for using ramzswap.
++
++1) Load Modules:
++	modprobe ramzswap num_devices=4
++	This creates 4 (uninitialized) devices: /dev/ramzswap{0,1,2,3}
++	(num_devices parameter is optional. Default: 1)
++
++2) Initialize:
++	Use rzscontrol utility to configure and initialize individual
++	ramzswap devices. Example:
++	rzscontrol /dev/ramzswap2 --init # uses default value of disksize_kb
++
++	*See rzscontrol man page for more details and examples*
++
++3) Activate:
++	swapon /dev/ramzswap2 # or any other initialized ramzswap device
++
++4) Stats:
++	rzscontrol /dev/ramzswap2 --stats
++
++5) Deactivate:
++	swapoff /dev/ramzswap2
++
++6) Reset:
++	rzscontrol /dev/ramzswap2 --reset
++	(This frees all the memory allocated for this device).
++
++
++Please report any problems at:
++ - Mailing list: linux-mm-cc at laptop dot org
++ - Issue tracker: http://code.google.com/p/compcache/issues/list
++
++Nitin Gupta
++ngupta at vflare.org
+-- 
+1.7.0.3
+

Added: dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-remove-ARM-specific-d-cache-hack.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/features/all/ramzswap/ramzswap-remove-ARM-specific-d-cache-hack.patch	Sun Apr  4 19:07:12 2010	(r15483)
@@ -0,0 +1,94 @@
+From 30fb8a7141e906116bb536dd54be99480b8fd238 Mon Sep 17 00:00:00 2001
+From: Nitin Gupta <ngupta at vflare.org>
+Date: Sat, 12 Dec 2009 11:44:46 +0530
+Subject: [PATCH 5/5] Staging: ramzswap: remove ARM specific d-cache hack
+
+Remove d-cache hack in ramzswap driver that was needed
+to workaround a bug in ARM version of update_mmu_cache()
+which caused stale data in d-cache to be transferred to
+userspace. This bug was fixed by git commit:
+	787b2faadc4356b6c2c71feb42fb944fece9a12f
+This also brings down one entry in TODO file.
+
+Signed-off-by: Nitin Gupta <ngupta at vflare.org>
+Acked-by: Pekka Enberg <penberg at cs.helsinki.fi>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ drivers/staging/ramzswap/TODO           |    1 -
+ drivers/staging/ramzswap/ramzswap_drv.c |   28 +++-------------------------
+ 2 files changed, 3 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/staging/ramzswap/TODO b/drivers/staging/ramzswap/TODO
+index bac40d6..8d64e28 100644
+--- a/drivers/staging/ramzswap/TODO
++++ b/drivers/staging/ramzswap/TODO
+@@ -1,6 +1,5 @@
+ TODO:
+ 	- Add support for swap notifiers
+-	- Remove CONFIG_ARM hack
+ 
+ Please send patches to Greg Kroah-Hartman <greg at kroah.com> and
+ Nitin Gupta <ngupta at vflare.org>
+diff --git a/drivers/staging/ramzswap/ramzswap_drv.c b/drivers/staging/ramzswap/ramzswap_drv.c
+index b839f05..989fac5 100644
+--- a/drivers/staging/ramzswap/ramzswap_drv.c
++++ b/drivers/staging/ramzswap/ramzswap_drv.c
+@@ -222,28 +222,6 @@ out:
+ 	return ret;
+ }
+ 
+-static void ramzswap_flush_dcache_page(struct page *page)
+-{
+-#ifdef CONFIG_ARM
+-	int flag = 0;
+-	/*
+-	 * Ugly hack to get flush_dcache_page() work on ARM.
+-	 * page_mapping(page) == NULL after clearing this swap cache flag.
+-	 * Without clearing this flag, flush_dcache_page() will simply set
+-	 * "PG_dcache_dirty" bit and return.
+-	 */
+-	if (PageSwapCache(page)) {
+-		flag = 1;
+-		ClearPageSwapCache(page);
+-	}
+-#endif
+-	flush_dcache_page(page);
+-#ifdef CONFIG_ARM
+-	if (flag)
+-		SetPageSwapCache(page);
+-#endif
+-}
+-
+ void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
+ 			struct ramzswap_ioctl_stats *s)
+ {
+@@ -655,7 +633,7 @@ static int handle_zero_page(struct bio *bio)
+ 	memset(user_mem, 0, PAGE_SIZE);
+ 	kunmap_atomic(user_mem, KM_USER0);
+ 
+-	ramzswap_flush_dcache_page(page);
++	flush_dcache_page(page);
+ 
+ 	set_bit(BIO_UPTODATE, &bio->bi_flags);
+ 	bio_endio(bio, 0);
+@@ -679,7 +657,7 @@ static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio)
+ 	kunmap_atomic(user_mem, KM_USER0);
+ 	kunmap_atomic(cmem, KM_USER1);
+ 
+-	ramzswap_flush_dcache_page(page);
++	flush_dcache_page(page);
+ 
+ 	set_bit(BIO_UPTODATE, &bio->bi_flags);
+ 	bio_endio(bio, 0);
+@@ -779,7 +757,7 @@ static int ramzswap_read(struct ramzswap *rzs, struct bio *bio)
+ 		goto out;
+ 	}
+ 
+-	ramzswap_flush_dcache_page(page);
++	flush_dcache_page(page);
+ 
+ 	set_bit(BIO_UPTODATE, &bio->bi_flags);
+ 	bio_endio(bio, 0);
+-- 
+1.7.0.3
+

Added: dists/sid/linux-2.6/debian/patches/features/all/ramzswap/xvmalloc-memory-allocator.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/features/all/ramzswap/xvmalloc-memory-allocator.patch	Sun Apr  4 19:07:12 2010	(r15483)
@@ -0,0 +1,700 @@
+From 644bf7b5983cf2540b57a5b25b775cb3c1e8e943 Mon Sep 17 00:00:00 2001
+From: Nitin Gupta <ngupta at vflare.org>
+Date: Tue, 22 Sep 2009 10:26:52 +0530
+Subject: [PATCH 1/5] Staging: xvmalloc memory allocator
+
+* Features:
+ - Low metadata overhead (just 4 bytes per object)
+ - O(1) Alloc/Free - except when we have to call system page allocator to
+   get additional memory.
+ - Very low fragmentation: In all tests, xvmalloc memory usage is within 12%
+   of "Ideal".
+ - Pool based allocator: Each pool can grow and shrink.
+ - It maps pages only when required. So, it does not hog vmalloc area which
+   is very small on 32-bit systems.
+
+SLUB allocator could not be used due to fragmentation issues:
+http://code.google.com/p/compcache/wiki/AllocatorsComparison
+Data here shows kmalloc using ~43% more memory than TLSF and xvMalloc
+is showed ~2% more space efficiency than TLSF (due to smaller metadata).
+Creating various kmem_caches can reduce space efficiency gap but still
+problem of being limited to low memory exists. Also, it depends on
+allocating higher order pages to reduce fragmentation - this is not
+acceptable for ramzswap as it is used under memory crunch (its a swap
+device!).
+
+SLOB allocator could not be used do to reasons mentioned here:
+http://lkml.org/lkml/2009/3/18/210
+
+* Implementation:
+It uses two-level bitmap search to find free list containing block of
+correct size. This idea is taken from TLSF (Two-Level Segregate Fit)
+allocator and is well explained in its paper (see [Links] below).
+
+* Limitations:
+ - Poor scalability: No per-cpu data structures (work in progress).
+
+[Links]
+1. Details and Performance data:
+http://code.google.com/p/compcache/wiki/xvMalloc
+http://code.google.com/p/compcache/wiki/xvMallocPerformance
+
+2. TLSF memory allocator:
+home: http://rtportal.upv.es/rtmalloc/
+paper: http://rtportal.upv.es/rtmalloc/files/MRBC_2008.pdf
+
+Signed-off-by: Nitin Gupta <ngupta at vflare.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
+---
+ drivers/staging/ramzswap/xvmalloc.c     |  507 +++++++++++++++++++++++++++++++
+ drivers/staging/ramzswap/xvmalloc.h     |   30 ++
+ drivers/staging/ramzswap/xvmalloc_int.h |   86 ++++++
+ 3 files changed, 623 insertions(+), 0 deletions(-)
+ create mode 100644 drivers/staging/ramzswap/xvmalloc.c
+ create mode 100644 drivers/staging/ramzswap/xvmalloc.h
+ create mode 100644 drivers/staging/ramzswap/xvmalloc_int.h
+
+diff --git a/drivers/staging/ramzswap/xvmalloc.c b/drivers/staging/ramzswap/xvmalloc.c
+new file mode 100644
+index 0000000..b3e986c
+--- /dev/null
++++ b/drivers/staging/ramzswap/xvmalloc.c
+@@ -0,0 +1,507 @@
++/*
++ * xvmalloc memory allocator
++ *
++ * Copyright (C) 2008, 2009  Nitin Gupta
++ *
++ * This code is released using a dual license strategy: BSD/GPL
++ * You can choose the licence that better fits your requirements.
++ *
++ * Released under the terms of 3-clause BSD License
++ * Released under the terms of GNU General Public License Version 2.0
++ */
++
++#include <linux/bitops.h>
++#include <linux/errno.h>
++#include <linux/highmem.h>
++#include <linux/init.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++
++#include "xvmalloc.h"
++#include "xvmalloc_int.h"
++
++static void stat_inc(u64 *value)
++{
++	*value = *value + 1;
++}
++
++static void stat_dec(u64 *value)
++{
++	*value = *value - 1;
++}
++
++static int test_flag(struct block_header *block, enum blockflags flag)
++{
++	return block->prev & BIT(flag);
++}
++
++static void set_flag(struct block_header *block, enum blockflags flag)
++{
++	block->prev |= BIT(flag);
++}
++
++static void clear_flag(struct block_header *block, enum blockflags flag)
++{
++	block->prev &= ~BIT(flag);
++}
++
++/*
++ * Given <page, offset> pair, provide a derefrencable pointer.
++ * This is called from xv_malloc/xv_free path, so it
++ * needs to be fast.
++ */
++static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type)
++{
++	unsigned char *base;
++
++	base = kmap_atomic(page, type);
++	return base + offset;
++}
++
++static void put_ptr_atomic(void *ptr, enum km_type type)
++{
++	kunmap_atomic(ptr, type);
++}
++
++static u32 get_blockprev(struct block_header *block)
++{
++	return block->prev & PREV_MASK;
++}
++
++static void set_blockprev(struct block_header *block, u16 new_offset)
++{
++	block->prev = new_offset | (block->prev & FLAGS_MASK);
++}
++
++static struct block_header *BLOCK_NEXT(struct block_header *block)
++{
++	return (struct block_header *)
++		((char *)block + block->size + XV_ALIGN);
++}
++
++/*
++ * Get index of free list containing blocks of maximum size
++ * which is less than or equal to given size.
++ */
++static u32 get_index_for_insert(u32 size)
++{
++	if (unlikely(size > XV_MAX_ALLOC_SIZE))
++		size = XV_MAX_ALLOC_SIZE;
++	size &= ~FL_DELTA_MASK;
++	return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
++}
++
++/*
++ * Get index of free list having blocks of size greater than
++ * or equal to requested size.
++ */
++static u32 get_index(u32 size)
++{
++	if (unlikely(size < XV_MIN_ALLOC_SIZE))
++		size = XV_MIN_ALLOC_SIZE;
++	size = ALIGN(size, FL_DELTA);
++	return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
++}
++
++/**
++ * find_block - find block of at least given size
++ * @pool: memory pool to search from
++ * @size: size of block required
++ * @page: page containing required block
++ * @offset: offset within the page where block is located.
++ *
++ * Searches two level bitmap to locate block of at least
++ * the given size. If such a block is found, it provides
++ * <page, offset> to identify this block and returns index
++ * in freelist where we found this block.
++ * Otherwise, returns 0 and <page, offset> params are not touched.
++ */
++static u32 find_block(struct xv_pool *pool, u32 size,
++			struct page **page, u32 *offset)
++{
++	ulong flbitmap, slbitmap;
++	u32 flindex, slindex, slbitstart;
++
++	/* There are no free blocks in this pool */
++	if (!pool->flbitmap)
++		return 0;
++
++	/* Get freelist index correspoding to this size */
++	slindex = get_index(size);
++	slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
++	slbitstart = slindex % BITS_PER_LONG;
++
++	/*
++	 * If freelist is not empty at this index, we found the
++	 * block - head of this list. This is approximate best-fit match.
++	 */
++	if (test_bit(slbitstart, &slbitmap)) {
++		*page = pool->freelist[slindex].page;
++		*offset = pool->freelist[slindex].offset;
++		return slindex;
++	}
++
++	/*
++	 * No best-fit found. Search a bit further in bitmap for a free block.
++	 * Second level bitmap consists of series of 32-bit chunks. Search
++	 * further in the chunk where we expected a best-fit, starting from
++	 * index location found above.
++	 */
++	slbitstart++;
++	slbitmap >>= slbitstart;
++
++	/* Skip this search if we were already at end of this bitmap chunk */
++	if ((slbitstart != BITS_PER_LONG) && slbitmap) {
++		slindex += __ffs(slbitmap) + 1;
++		*page = pool->freelist[slindex].page;
++		*offset = pool->freelist[slindex].offset;
++		return slindex;
++	}
++
++	/* Now do a full two-level bitmap search to find next nearest fit */
++	flindex = slindex / BITS_PER_LONG;
++
++	flbitmap = (pool->flbitmap) >> (flindex + 1);
++	if (!flbitmap)
++		return 0;
++
++	flindex += __ffs(flbitmap) + 1;
++	slbitmap = pool->slbitmap[flindex];
++	slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
++	*page = pool->freelist[slindex].page;
++	*offset = pool->freelist[slindex].offset;
++
++	return slindex;
++}
++
++/*
++ * Insert block at <page, offset> in freelist of given pool.
++ * freelist used depends on block size.
++ */
++static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
++			struct block_header *block)
++{
++	u32 flindex, slindex;
++	struct block_header *nextblock;
++
++	slindex = get_index_for_insert(block->size);
++	flindex = slindex / BITS_PER_LONG;
++
++	block->link.prev_page = 0;
++	block->link.prev_offset = 0;
++	block->link.next_page = pool->freelist[slindex].page;
++	block->link.next_offset = pool->freelist[slindex].offset;
++	pool->freelist[slindex].page = page;
++	pool->freelist[slindex].offset = offset;
++
++	if (block->link.next_page) {
++		nextblock = get_ptr_atomic(block->link.next_page,
++					block->link.next_offset, KM_USER1);
++		nextblock->link.prev_page = page;
++		nextblock->link.prev_offset = offset;
++		put_ptr_atomic(nextblock, KM_USER1);
++	}
++
++	__set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
++	__set_bit(flindex, &pool->flbitmap);
++}
++
++/*
++ * Remove block from head of freelist. Index 'slindex' identifies the freelist.
++ */
++static void remove_block_head(struct xv_pool *pool,
++			struct block_header *block, u32 slindex)
++{
++	struct block_header *tmpblock;
++	u32 flindex = slindex / BITS_PER_LONG;
++
++	pool->freelist[slindex].page = block->link.next_page;
++	pool->freelist[slindex].offset = block->link.next_offset;
++	block->link.prev_page = 0;
++	block->link.prev_offset = 0;
++
++	if (!pool->freelist[slindex].page) {
++		__clear_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
++		if (!pool->slbitmap[flindex])
++			__clear_bit(flindex, &pool->flbitmap);
++	} else {
++		/*
++		 * DEBUG ONLY: We need not reinitialize freelist head previous
++		 * pointer to 0 - we never depend on its value. But just for
++		 * sanity, lets do it.
++		 */
++		tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
++				pool->freelist[slindex].offset, KM_USER1);
++		tmpblock->link.prev_page = 0;
++		tmpblock->link.prev_offset = 0;
++		put_ptr_atomic(tmpblock, KM_USER1);
++	}
++}
++
++/*
++ * Remove block from freelist. Index 'slindex' identifies the freelist.
++ */
++static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
++			struct block_header *block, u32 slindex)
++{
++	u32 flindex;
++	struct block_header *tmpblock;
++
++	if (pool->freelist[slindex].page == page
++	   && pool->freelist[slindex].offset == offset) {
++		remove_block_head(pool, block, slindex);
++		return;
++	}
++
++	flindex = slindex / BITS_PER_LONG;
++
++	if (block->link.prev_page) {
++		tmpblock = get_ptr_atomic(block->link.prev_page,
++				block->link.prev_offset, KM_USER1);
++		tmpblock->link.next_page = block->link.next_page;
++		tmpblock->link.next_offset = block->link.next_offset;
++		put_ptr_atomic(tmpblock, KM_USER1);
++	}
++
++	if (block->link.next_page) {
++		tmpblock = get_ptr_atomic(block->link.next_page,
++				block->link.next_offset, KM_USER1);
++		tmpblock->link.prev_page = block->link.prev_page;
++		tmpblock->link.prev_offset = block->link.prev_offset;
++		put_ptr_atomic(tmpblock, KM_USER1);
++	}
++}
++
++/*
++ * Allocate a page and add it freelist of given pool.
++ */
++static int grow_pool(struct xv_pool *pool, gfp_t flags)
++{
++	struct page *page;
++	struct block_header *block;
++
++	page = alloc_page(flags);
++	if (unlikely(!page))
++		return -ENOMEM;
++
++	stat_inc(&pool->total_pages);
++
++	spin_lock(&pool->lock);
++	block = get_ptr_atomic(page, 0, KM_USER0);
++
++	block->size = PAGE_SIZE - XV_ALIGN;
++	set_flag(block, BLOCK_FREE);
++	clear_flag(block, PREV_FREE);
++	set_blockprev(block, 0);
++
++	insert_block(pool, page, 0, block);
++
++	put_ptr_atomic(block, KM_USER0);
++	spin_unlock(&pool->lock);
++
++	return 0;
++}
++
++/*
++ * Create a memory pool. Allocates freelist, bitmaps and other
++ * per-pool metadata.
++ */
++struct xv_pool *xv_create_pool(void)
++{
++	u32 ovhd_size;
++	struct xv_pool *pool;
++
++	ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
++	pool = kzalloc(ovhd_size, GFP_KERNEL);
++	if (!pool)
++		return NULL;
++
++	spin_lock_init(&pool->lock);
++
++	return pool;
++}
++
++void xv_destroy_pool(struct xv_pool *pool)
++{
++	kfree(pool);
++}
++
++/**
++ * xv_malloc - Allocate block of given size from pool.
++ * @pool: pool to allocate from
++ * @size: size of block to allocate
++ * @page: page no. that holds the object
++ * @offset: location of object within page
++ *
++ * On success, <page, offset> identifies block allocated
++ * and 0 is returned. On failure, <page, offset> is set to
++ * 0 and -ENOMEM is returned.
++ *
++ * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
++ */
++int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
++		u32 *offset, gfp_t flags)
++{
++	int error;
++	u32 index, tmpsize, origsize, tmpoffset;
++	struct block_header *block, *tmpblock;
++
++	*page = NULL;
++	*offset = 0;
++	origsize = size;
++
++	if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
++		return -ENOMEM;
++
++	size = ALIGN(size, XV_ALIGN);
++
++	spin_lock(&pool->lock);
++
++	index = find_block(pool, size, page, offset);
++
++	if (!*page) {
++		spin_unlock(&pool->lock);
++		if (flags & GFP_NOWAIT)
++			return -ENOMEM;
++		error = grow_pool(pool, flags);
++		if (unlikely(error))
++			return error;
++
++		spin_lock(&pool->lock);
++		index = find_block(pool, size, page, offset);
++	}
++
++	if (!*page) {
++		spin_unlock(&pool->lock);
++		return -ENOMEM;
++	}
++
++	block = get_ptr_atomic(*page, *offset, KM_USER0);
++
++	remove_block_head(pool, block, index);
++
++	/* Split the block if required */
++	tmpoffset = *offset + size + XV_ALIGN;
++	tmpsize = block->size - size;
++	tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
++	if (tmpsize) {
++		tmpblock->size = tmpsize - XV_ALIGN;
++		set_flag(tmpblock, BLOCK_FREE);
++		clear_flag(tmpblock, PREV_FREE);
++
++		set_blockprev(tmpblock, *offset);
++		if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
++			insert_block(pool, *page, tmpoffset, tmpblock);
++
++		if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
++			tmpblock = BLOCK_NEXT(tmpblock);
++			set_blockprev(tmpblock, tmpoffset);
++		}
++	} else {
++		/* This block is exact fit */
++		if (tmpoffset != PAGE_SIZE)
++			clear_flag(tmpblock, PREV_FREE);
++	}
++
++	block->size = origsize;
++	clear_flag(block, BLOCK_FREE);
++
++	put_ptr_atomic(block, KM_USER0);
++	spin_unlock(&pool->lock);
++
++	*offset += XV_ALIGN;
++
++	return 0;
++}
++
++/*
++ * Free block identified with <page, offset>
++ */
++void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
++{
++	void *page_start;
++	struct block_header *block, *tmpblock;
++
++	offset -= XV_ALIGN;
++
++	spin_lock(&pool->lock);
++
++	page_start = get_ptr_atomic(page, 0, KM_USER0);
++	block = (struct block_header *)((char *)page_start + offset);
++
++	/* Catch double free bugs */
++	BUG_ON(test_flag(block, BLOCK_FREE));
++
++	block->size = ALIGN(block->size, XV_ALIGN);
++
++	tmpblock = BLOCK_NEXT(block);
++	if (offset + block->size + XV_ALIGN == PAGE_SIZE)
++		tmpblock = NULL;
++
++	/* Merge next block if its free */
++	if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
++		/*
++		 * Blocks smaller than XV_MIN_ALLOC_SIZE
++		 * are not inserted in any free list.
++		 */
++		if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
++			remove_block(pool, page,
++				    offset + block->size + XV_ALIGN, tmpblock,
++				    get_index_for_insert(tmpblock->size));
++		}
++		block->size += tmpblock->size + XV_ALIGN;
++	}
++
++	/* Merge previous block if its free */
++	if (test_flag(block, PREV_FREE)) {
++		tmpblock = (struct block_header *)((char *)(page_start) +
++						get_blockprev(block));
++		offset = offset - tmpblock->size - XV_ALIGN;
++
++		if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
++			remove_block(pool, page, offset, tmpblock,
++				    get_index_for_insert(tmpblock->size));
++
++		tmpblock->size += block->size + XV_ALIGN;
++		block = tmpblock;
++	}
++
++	/* No used objects in this page. Free it. */
++	if (block->size == PAGE_SIZE - XV_ALIGN) {
++		put_ptr_atomic(page_start, KM_USER0);
++		spin_unlock(&pool->lock);
++
++		__free_page(page);
++		stat_dec(&pool->total_pages);
++		return;
++	}
++
++	set_flag(block, BLOCK_FREE);
++	if (block->size >= XV_MIN_ALLOC_SIZE)
++		insert_block(pool, page, offset, block);
++
++	if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
++		tmpblock = BLOCK_NEXT(block);
++		set_flag(tmpblock, PREV_FREE);
++		set_blockprev(tmpblock, offset);
++	}
++
++	put_ptr_atomic(page_start, KM_USER0);
++	spin_unlock(&pool->lock);
++}
++
++u32 xv_get_object_size(void *obj)
++{
++	struct block_header *blk;
++
++	blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
++	return blk->size;
++}
++
++/*
++ * Returns total memory used by allocator (userdata + metadata)
++ */
++u64 xv_get_total_size_bytes(struct xv_pool *pool)
++{
++	return pool->total_pages << PAGE_SHIFT;
++}
+diff --git a/drivers/staging/ramzswap/xvmalloc.h b/drivers/staging/ramzswap/xvmalloc.h
+new file mode 100644
+index 0000000..010c6fe
+--- /dev/null
++++ b/drivers/staging/ramzswap/xvmalloc.h
+@@ -0,0 +1,30 @@
++/*
++ * xvmalloc memory allocator
++ *
++ * Copyright (C) 2008, 2009  Nitin Gupta
++ *
++ * This code is released using a dual license strategy: BSD/GPL
++ * You can choose the licence that better fits your requirements.
++ *
++ * Released under the terms of 3-clause BSD License
++ * Released under the terms of GNU General Public License Version 2.0
++ */
++
++#ifndef _XV_MALLOC_H_
++#define _XV_MALLOC_H_
++
++#include <linux/types.h>
++
++struct xv_pool;
++
++struct xv_pool *xv_create_pool(void);
++void xv_destroy_pool(struct xv_pool *pool);
++
++int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
++			u32 *offset, gfp_t flags);
++void xv_free(struct xv_pool *pool, struct page *page, u32 offset);
++
++u32 xv_get_object_size(void *obj);
++u64 xv_get_total_size_bytes(struct xv_pool *pool);
++
++#endif
+diff --git a/drivers/staging/ramzswap/xvmalloc_int.h b/drivers/staging/ramzswap/xvmalloc_int.h
+new file mode 100644
+index 0000000..03c1a65
+--- /dev/null
++++ b/drivers/staging/ramzswap/xvmalloc_int.h
+@@ -0,0 +1,86 @@
++/*
++ * xvmalloc memory allocator
++ *
++ * Copyright (C) 2008, 2009  Nitin Gupta
++ *
++ * This code is released using a dual license strategy: BSD/GPL
++ * You can choose the licence that better fits your requirements.
++ *
++ * Released under the terms of 3-clause BSD License
++ * Released under the terms of GNU General Public License Version 2.0
++ */
++
++#ifndef _XV_MALLOC_INT_H_
++#define _XV_MALLOC_INT_H_
++
++#include <linux/kernel.h>
++#include <linux/types.h>
++
++/* User configurable params */
++
++/* Must be power of two */
++#define XV_ALIGN_SHIFT	2
++#define XV_ALIGN	(1 << XV_ALIGN_SHIFT)
++#define XV_ALIGN_MASK	(XV_ALIGN - 1)
++
++/* This must be greater than sizeof(link_free) */
++#define XV_MIN_ALLOC_SIZE	32
++#define XV_MAX_ALLOC_SIZE	(PAGE_SIZE - XV_ALIGN)
++
++/* Free lists are separated by FL_DELTA bytes */
++#define FL_DELTA_SHIFT	3
++#define FL_DELTA	(1 << FL_DELTA_SHIFT)
++#define FL_DELTA_MASK	(FL_DELTA - 1)
++#define NUM_FREE_LISTS	((XV_MAX_ALLOC_SIZE - XV_MIN_ALLOC_SIZE) \
++				/ FL_DELTA + 1)
++
++#define MAX_FLI		DIV_ROUND_UP(NUM_FREE_LISTS, BITS_PER_LONG)
++
++/* End of user params */
++
++enum blockflags {
++	BLOCK_FREE,
++	PREV_FREE,
++	__NR_BLOCKFLAGS,
++};
++
++#define FLAGS_MASK	XV_ALIGN_MASK
++#define PREV_MASK	(~FLAGS_MASK)
++
++struct freelist_entry {
++	struct page *page;
++	u16 offset;
++	u16 pad;
++};
++
++struct link_free {
++	struct page *prev_page;
++	struct page *next_page;
++	u16 prev_offset;
++	u16 next_offset;
++};
++
++struct block_header {
++	union {
++		/* This common header must be ALIGN bytes */
++		u8 common[XV_ALIGN];
++		struct {
++			u16 size;
++			u16 prev;
++		};
++	};
++	struct link_free link;
++};
++
++struct xv_pool {
++	ulong flbitmap;
++	ulong slbitmap[MAX_FLI];
++	spinlock_t lock;
++
++	struct freelist_entry freelist[NUM_FREE_LISTS];
++
++	/* stats */
++	u64 total_pages;
++};
++
++#endif
+-- 
+1.7.0.3
+

Modified: dists/sid/linux-2.6/debian/patches/series/11
==============================================================================
--- dists/sid/linux-2.6/debian/patches/series/11	Sun Apr  4 08:36:36 2010	(r15482)
+++ dists/sid/linux-2.6/debian/patches/series/11	Sun Apr  4 19:07:12 2010	(r15483)
@@ -19,3 +19,8 @@
 - bugfix/all/hrtimer-tune-hrtimer_interrupt-hang-logic.patch
 + features/all/phylib-Support-phy-module-autoloading.patch
 + features/all/phylib-Add-module-table-to-all-existing-phy-drivers.patch
++ features/all/ramzswap/xvmalloc-memory-allocator.patch
++ features/all/ramzswap/ramzswap-add.patch
++ features/all/ramzswap/ramzswap-documentation.patch
++ features/all/ramzswap/ramzswap-add-TODO-file.patch
++ features/all/ramzswap/ramzswap-remove-ARM-specific-d-cache-hack.patch



More information about the Kernel-svn-changes mailing list