ext4_utils: add filesystem capabilities support.

Add support for specifying filesystem capabilities when
creating a new filesystem.

The combination of SELinux extended attributes plus
filesystem capability extended attributes is too big
to fit inside one inode entry. Because of this, I added
support to ext4_utils to create an xattr block and link
the inode to that block. We continue to try to fit
everything inside the inode if possible, but fall over to
creating a block if the extended attribute is too big.

Change-Id: I40ebb63975b15ecd8c565486e171b4d50cd4dfaa
diff --git a/ext4_utils/allocate.c b/ext4_utils/allocate.c
index 3229abe..5c60e92 100644
--- a/ext4_utils/allocate.c
+++ b/ext4_utils/allocate.c
@@ -60,6 +60,12 @@
 	u16 used_dirs;
 };
 
+struct xattr_list_element {
+	struct ext4_inode *inode;
+	struct ext4_xattr_header *header;
+	struct xattr_list_element *next;
+};
+
 struct block_allocation *create_allocation()
 {
 	struct block_allocation *alloc = malloc(sizeof(struct block_allocation));
@@ -74,6 +80,25 @@
 	return alloc;
 }
 
+static struct ext4_xattr_header *xattr_list_find(struct ext4_inode *inode)
+{
+	struct xattr_list_element *element;
+	for (element = aux_info.xattrs; element != NULL; element = element->next) {
+		if (element->inode == inode)
+			return element->header;
+	}
+	return NULL;
+}
+
+static void xattr_list_insert(struct ext4_inode *inode, struct ext4_xattr_header *header)
+{
+	struct xattr_list_element *element = malloc(sizeof(struct xattr_list_element));
+	element->inode = inode;
+	element->header = header;
+	element->next = aux_info.xattrs;
+	aux_info.xattrs = element;
+}
+
 static void region_list_remove(struct region_list *list, struct region *reg)
 {
 	if (reg->prev)
@@ -673,6 +698,35 @@
 		info.inode_size);
 }
 
+struct ext4_xattr_header *get_xattr_block_for_inode(struct ext4_inode *inode)
+{
+	struct ext4_xattr_header *block = xattr_list_find(inode);
+	if (block != NULL)
+		return block;
+
+	u32 block_num = allocate_block();
+	block = calloc(info.block_size, 1);
+	if (block == NULL) {
+		error("get_xattr: failed to allocate %d", info.block_size);
+		return NULL;
+	}
+
+	block->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
+	block->h_refcount = cpu_to_le32(1);
+	block->h_blocks = cpu_to_le32(1);
+	inode->i_blocks_lo = cpu_to_le32(le32_to_cpu(inode->i_blocks_lo) + (info.block_size / 512));
+	inode->i_file_acl_lo = cpu_to_le32(block_num);
+
+	int result = sparse_file_add_data(info.sparse_file, block, info.block_size, block_num);
+	if (result != 0) {
+		error("get_xattr: sparse_file_add_data failure %d", result);
+		free(block);
+		return NULL;
+	}
+	xattr_list_insert(inode, block);
+	return block;
+}
+
 /* Mark the first len inodes in a block group as used */
 u32 reserve_inodes(int bg, u32 num)
 {
diff --git a/ext4_utils/allocate.h b/ext4_utils/allocate.h
index 0575e84..7a3ffed 100644
--- a/ext4_utils/allocate.h
+++ b/ext4_utils/allocate.h
@@ -21,6 +21,7 @@
 
 #include "ext4_utils.h"
 #include "ext4.h"
+#include "xattr.h"
 
 struct block_allocation;
 
@@ -31,6 +32,7 @@
 int block_allocation_num_regions(struct block_allocation *alloc);
 int block_allocation_len(struct block_allocation *alloc);
 struct ext4_inode *get_inode(u32 inode);
+struct ext4_xattr_header *get_xattr_block_for_inode(struct ext4_inode *inode);
 void reduce_allocation(struct block_allocation *alloc, u32 len);
 u32 get_block(struct block_allocation *alloc, u32 block);
 u32 get_oob_block(struct block_allocation *alloc, u32 block);
diff --git a/ext4_utils/contents.c b/ext4_utils/contents.c
index 4d45f67..13e0510 100644
--- a/ext4_utils/contents.c
+++ b/ext4_utils/contents.c
@@ -17,6 +17,8 @@
 #include <sys/stat.h>
 #include <string.h>
 #include <stdio.h>
+#include <linux/capability.h>
+#include <linux/xattr.h>
 
 #include "ext4_utils.h"
 #include "ext4.h"
@@ -242,44 +244,228 @@
 	return 0;
 }
 
-#define XATTR_SELINUX_SUFFIX "selinux"
-
-/* XXX */
-#define cpu_to_le32(x) (x)
-#define cpu_to_le16(x) (x)
-
-int inode_set_selinux(u32 inode_num, const char *secon)
+/*
+ * Returns the amount of free space available in the specified
+ * xattr region
+ */
+static size_t xattr_free_space(struct ext4_xattr_entry *entry, char *end)
 {
-	struct ext4_inode *inode = get_inode(inode_num);
-	u32 *hdr;
-	struct ext4_xattr_entry *entry;
-	size_t name_len = strlen(XATTR_SELINUX_SUFFIX);
-	size_t value_len;
-	size_t size, min_offs;
-	char *val;
+	while(!IS_LAST_ENTRY(entry) && (((char *) entry) < end)) {
+		end   -= EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size));
+		entry  = EXT4_XATTR_NEXT(entry);
+	}
 
-	if (!secon)
+	if (((char *) entry) > end) {
+		error("unexpected read beyond end of xattr space");
 		return 0;
+	}
 
-	if (!inode)
+	return end - ((char *) entry);
+}
+
+/*
+ * Returns a pointer to the free space immediately after the
+ * last xattr element
+ */
+static struct ext4_xattr_entry* xattr_get_last(struct ext4_xattr_entry *entry)
+{
+	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+		// skip entry
+	}
+	return entry;
+}
+
+/*
+ * assert that the elements in the ext4 xattr section are in sorted order
+ *
+ * The ext4 filesystem requires extended attributes to be sorted when
+ * they're not stored in the inode. The kernel ext4 code uses the following
+ * sorting algorithm:
+ *
+ * 1) First sort extended attributes by their name_index. For example,
+ *    EXT4_XATTR_INDEX_USER (1) comes before EXT4_XATTR_INDEX_SECURITY (6).
+ * 2) If the name_indexes are equal, then sorting is based on the length
+ *    of the name. For example, XATTR_SELINUX_SUFFIX ("selinux") comes before
+ *    XATTR_CAPS_SUFFIX ("capability") because "selinux" is shorter than "capability"
+ * 3) If the name_index and name_length are equal, then memcmp() is used to determine
+ *    which name comes first. For example, "selinux" would come before "yelinux".
+ *
+ * This method is intended to implement the sorting function defined in
+ * the Linux kernel file fs/ext4/xattr.c function ext4_xattr_find_entry().
+ */
+static void xattr_assert_sane(struct ext4_xattr_entry *entry)
+{
+	for( ; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
+		if (IS_LAST_ENTRY(next)) {
+			return;
+		}
+
+		int cmp = next->e_name_index - entry->e_name_index;
+		if (cmp == 0)
+			cmp = next->e_name_len - entry->e_name_len;
+		if (cmp == 0)
+			cmp = memcmp(next->e_name, entry->e_name, next->e_name_len);
+		if (cmp < 0) {
+			error("BUG: extended attributes are not sorted\n");
+			return;
+		}
+		if (cmp == 0) {
+			error("BUG: duplicate extended attributes detected\n");
+			return;
+		}
+	}
+}
+
+#define NAME_HASH_SHIFT 5
+#define VALUE_HASH_SHIFT 16
+
+static void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
+		struct ext4_xattr_entry *entry)
+{
+	__u32 hash = 0;
+	char *name = entry->e_name;
+	int n;
+
+	for (n = 0; n < entry->e_name_len; n++) {
+		hash = (hash << NAME_HASH_SHIFT) ^
+			(hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
+			*name++;
+	}
+
+	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+		__le32 *value = (__le32 *)((char *)header +
+			le16_to_cpu(entry->e_value_offs));
+		for (n = (le32_to_cpu(entry->e_value_size) +
+			EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
+			hash = (hash << VALUE_HASH_SHIFT) ^
+				(hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
+				le32_to_cpu(*value++);
+		}
+	}
+	entry->e_hash = cpu_to_le32(hash);
+}
+
+#undef NAME_HASH_SHIFT
+#undef VALUE_HASH_SHIFT
+
+static struct ext4_xattr_entry* xattr_addto_range(
+		void *block_start,
+		void *block_end,
+		struct ext4_xattr_entry *first,
+		int name_index,
+		const char *name,
+		const void *value,
+		size_t value_len)
+{
+	size_t name_len = strlen(name);
+	if (name_len > 255)
+		return NULL;
+
+	size_t available_size = xattr_free_space(first, block_end);
+	size_t needed_size = EXT4_XATTR_LEN(name_len) + EXT4_XATTR_SIZE(value_len);
+
+	if (needed_size > available_size)
+		return NULL;
+
+	struct ext4_xattr_entry *new_entry = xattr_get_last(first);
+	memset(new_entry, 0, EXT4_XATTR_LEN(name_len));
+
+	new_entry->e_name_len = name_len;
+	new_entry->e_name_index = name_index;
+	memcpy(new_entry->e_name, name, name_len);
+	new_entry->e_value_block = 0;
+	new_entry->e_value_size = cpu_to_le32(value_len);
+
+	char *val = (char *) new_entry + available_size - EXT4_XATTR_SIZE(value_len);
+	size_t e_value_offs = val - (char *) block_start;
+
+	new_entry->e_value_offs = cpu_to_le16(e_value_offs);
+	memset(val, 0, EXT4_XATTR_SIZE(value_len));
+	memcpy(val, value, value_len);
+
+	xattr_assert_sane(first);
+	return new_entry;
+}
+
+static int xattr_addto_inode(struct ext4_inode *inode, int name_index,
+		const char *name, const void *value, size_t value_len)
+{
+	struct ext4_xattr_ibody_header *hdr = (struct ext4_xattr_ibody_header *) (inode + 1);
+	struct ext4_xattr_entry *first = (struct ext4_xattr_entry *) (hdr + 1);
+	char *block_end = ((char *) inode) + info.inode_size;
+
+	struct ext4_xattr_entry *result =
+		xattr_addto_range(first, block_end, first, name_index, name, value, value_len);
+
+	if (result == NULL)
 		return -1;
 
-	hdr = (u32 *) (inode + 1);
-	*hdr = cpu_to_le32(EXT4_XATTR_MAGIC);
-	entry = (struct ext4_xattr_entry *) (hdr+1);
-	memset(entry, 0, EXT4_XATTR_LEN(name_len));
-	entry->e_name_index = EXT4_XATTR_INDEX_SECURITY;
-	entry->e_name_len = name_len;
-	memcpy(entry->e_name, XATTR_SELINUX_SUFFIX, name_len);
-	value_len = strlen(secon)+1;
-	entry->e_value_size = cpu_to_le32(value_len);
-	min_offs = (char *)inode + info.inode_size - (char*) entry;
-	size = EXT4_XATTR_SIZE(value_len);
-	val = (char *)entry + min_offs - size;
-	entry->e_value_offs = cpu_to_le16(min_offs - size);
-	memset(val + size - EXT4_XATTR_PAD, 0, EXT4_XATTR_PAD);
-	memcpy(val, secon, value_len);
+	hdr->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
 	inode->i_extra_isize = cpu_to_le16(sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE);
 
 	return 0;
 }
+
+static int xattr_addto_block(struct ext4_inode *inode, int name_index,
+		const char *name, const void *value, size_t value_len)
+{
+	struct ext4_xattr_header *header = get_xattr_block_for_inode(inode);
+	if (!header)
+		return -1;
+
+	struct ext4_xattr_entry *first = (struct ext4_xattr_entry *) (header + 1);
+	char *block_end = ((char *) header) + info.block_size;
+
+	struct ext4_xattr_entry *result =
+		xattr_addto_range(header, block_end, first, name_index, name, value, value_len);
+
+	if (result == NULL)
+		return -1;
+
+	ext4_xattr_hash_entry(header, result);
+	return 0;
+}
+
+
+static int xattr_add(u32 inode_num, int name_index, const char *name,
+		const void *value, size_t value_len)
+{
+	if (!value)
+		return 0;
+
+	struct ext4_inode *inode = get_inode(inode_num);
+
+	if (!inode)
+		return -1;
+
+	int result = xattr_addto_inode(inode, name_index, name, value, value_len);
+	if (result != 0) {
+		result = xattr_addto_block(inode, name_index, name, value, value_len);
+	}
+	return result;
+}
+
+int inode_set_selinux(u32 inode_num, const char *secon)
+{
+	return xattr_add(inode_num, EXT4_XATTR_INDEX_SECURITY,
+		XATTR_SELINUX_SUFFIX, secon, strlen(secon) + 1);
+}
+
+int inode_set_capabilities(u32 inode_num, uint64_t capabilities) {
+	if (capabilities == 0)
+		return 0;
+
+	struct vfs_cap_data cap_data;
+	memset(&cap_data, 0, sizeof(cap_data));
+
+	cap_data.magic_etc = VFS_CAP_REVISION | VFS_CAP_FLAGS_EFFECTIVE;
+	cap_data.data[0].permitted = (uint32_t) (capabilities & 0xffffffff);
+	cap_data.data[0].inheritable = 0;
+	cap_data.data[1].permitted = (uint32_t) (capabilities >> 32);
+	cap_data.data[1].inheritable = 0;
+
+	return xattr_add(inode_num, EXT4_XATTR_INDEX_SECURITY,
+		XATTR_CAPS_SUFFIX, &cap_data, sizeof(cap_data));
+}
+
diff --git a/ext4_utils/contents.h b/ext4_utils/contents.h
index 751c0b3..4272000 100644
--- a/ext4_utils/contents.h
+++ b/ext4_utils/contents.h
@@ -30,6 +30,7 @@
 	u32 *inode;
 	u32 mtime;
 	char *secon;
+	uint64_t capabilities;
 };
 
 u32 make_directory(u32 dir_inode_num, u32 entries, struct dentry *dentries,
@@ -38,4 +39,5 @@
 u32 make_link(const char *link);
 int inode_set_permissions(u32 inode_num, u16 mode, u16 uid, u16 gid, u32 mtime);
 int inode_set_selinux(u32 inode_num, const char *secon);
+int inode_set_capabilities(u32 inode_num, uint64_t capabilities);
 #endif
diff --git a/ext4_utils/ext4_utils.c b/ext4_utils/ext4_utils.c
index 4b87c6e..c3bec96 100644
--- a/ext4_utils/ext4_utils.c
+++ b/ext4_utils/ext4_utils.c
@@ -126,6 +126,7 @@
 	aux_info.bg_desc = calloc(info.block_size, aux_info.bg_desc_blocks);
 	if (!aux_info.bg_desc)
 		critical_error_errno("calloc");
+	aux_info.xattrs = NULL;
 }
 
 void ext4_free_fs_aux_info()
diff --git a/ext4_utils/ext4_utils.h b/ext4_utils/ext4_utils.h
index 0d0b6bc..0a9bd56 100644
--- a/ext4_utils/ext4_utils.h
+++ b/ext4_utils/ext4_utils.h
@@ -36,6 +36,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <setjmp.h>
+#include <stdint.h>
 
 #if defined(__APPLE__) && defined(__MACH__)
 #define lseek64 lseek
@@ -84,6 +85,12 @@
 #define __u16 u16
 #define __u8 u8
 
+/* XXX */
+#define cpu_to_le32(x) (x)
+#define cpu_to_le16(x) (x)
+#define le32_to_cpu(x) (x)
+#define le16_to_cpu(x) (x)
+
 typedef unsigned long long u64;
 typedef signed long long s64;
 typedef unsigned int u32;
@@ -91,6 +98,7 @@
 typedef unsigned char u8;
 
 struct block_group_info;
+struct xattr_list_element;
 
 struct ext2_group_desc {
 	__le32 bg_block_bitmap;
@@ -130,6 +138,7 @@
 	struct ext4_super_block **backup_sb;
 	struct ext2_group_desc *bg_desc;
 	struct block_group_info *bgs;
+	struct xattr_list_element *xattrs;
 	u32 first_data_block;
 	u64 len_blocks;
 	u32 inode_table_blocks;
@@ -171,7 +180,7 @@
 u16 ext4_crc16(u16 crc_in, const void *buf, int size);
 
 typedef void (*fs_config_func_t)(const char *path, int dir, unsigned *uid, unsigned *gid,
-        unsigned *mode);
+        unsigned *mode, uint64_t *capabilities);
 
 struct selabel_handle;
 
diff --git a/ext4_utils/make_ext4fs.c b/ext4_utils/make_ext4fs.c
index 17b7ae6..c2a2665 100644
--- a/ext4_utils/make_ext4fs.c
+++ b/ext4_utils/make_ext4fs.c
@@ -164,16 +164,18 @@
 		dentries[i].size = stat.st_size;
 		dentries[i].mode = stat.st_mode & (S_ISUID|S_ISGID|S_ISVTX|S_IRWXU|S_IRWXG|S_IRWXO);
 		dentries[i].mtime = stat.st_mtime;
+		uint64_t capabilities;
 		if (fs_config_func != NULL) {
 #ifdef ANDROID
 			unsigned int mode = 0;
 			unsigned int uid = 0;
 			unsigned int gid = 0;
 			int dir = S_ISDIR(stat.st_mode);
-			fs_config_func(dentries[i].path, dir, &uid, &gid, &mode);
+			fs_config_func(dentries[i].path, dir, &uid, &gid, &mode, &capabilities);
 			dentries[i].mode = mode;
 			dentries[i].uid = uid;
 			dentries[i].gid = gid;
+			dentries[i].capabilities = capabilities;
 #else
 			error("can't set android permissions - built without android support");
 #endif
@@ -270,9 +272,20 @@
 			dentries[i].mtime);
 		if (ret)
 			error("failed to set permissions on %s\n", dentries[i].path);
+
+		/*
+		 * It's important to call inode_set_selinux() before
+		 * inode_set_capabilities(). Extended attributes need to
+		 * be stored sorted order, and we guarantee this by making
+		 * the calls in the proper order.
+		 * Please see xattr_assert_sane() in contents.c
+		 */
 		ret = inode_set_selinux(entry_inode, dentries[i].secon);
 		if (ret)
 			error("failed to set SELinux context on %s\n", dentries[i].path);
+		ret = inode_set_capabilities(entry_inode, dentries[i].capabilities);
+		if (ret)
+			error("failed to set capability on %s\n", dentries[i].path);
 
 		free(dentries[i].path);
 		free(dentries[i].full_path);
@@ -502,7 +515,8 @@
 	info.inodes_per_group = compute_inodes_per_group();
 
 	info.feat_compat |=
-			EXT4_FEATURE_COMPAT_RESIZE_INODE;
+			EXT4_FEATURE_COMPAT_RESIZE_INODE |
+			EXT4_FEATURE_COMPAT_EXT_ATTR;
 
 	info.feat_ro_compat |=
 			EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER |
diff --git a/ext4_utils/xattr.h b/ext4_utils/xattr.h
index 2c6d9cc..60c01ce 100644
--- a/ext4_utils/xattr.h
+++ b/ext4_utils/xattr.h
@@ -1,8 +1,24 @@
 #include <sys/types.h>
 
+#ifndef _SYSTEM_EXTRAS_EXT4_UTILS_XATTR_H
+#define _SYSTEM_EXTRAS_EXT4_UTILS_XATTR_H 1
+
 #define EXT4_XATTR_MAGIC 0xEA020000
 #define EXT4_XATTR_INDEX_SECURITY 6
 
+struct ext4_xattr_header {
+    __le32  h_magic;
+    __le32  h_refcount;
+    __le32  h_blocks;
+    __le32  h_hash;
+    __le32  h_checksum;
+    __u32   h_reserved[3];
+};
+
+struct ext4_xattr_ibody_header {
+    __le32  h_magic;
+};
+
 struct ext4_xattr_entry {
     __u8 e_name_len;
     __u8 e_name_index;
@@ -19,5 +35,11 @@
 #define EXT4_XATTR_LEN(name_len) \
     (((name_len) + EXT4_XATTR_ROUND + \
     sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
+#define EXT4_XATTR_NEXT(entry) \
+    ((struct ext4_xattr_entry *)( \
+     (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
 #define EXT4_XATTR_SIZE(size) \
     (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
+
+#endif /* !_SYSTEM_EXTRAS_EXT4_UTILS_XATTR_H */