ext4_utils: add filesystem capabilities support.
Add support for specifying filesystem capabilities when
creating a new filesystem.
The combination of SELinux extended attributes plus
filesystem capability extended attributes is too big
to fit inside one inode entry. Because of this, I added
support to ext4_utils to create an xattr block and link
the inode to that block. We continue to try to fit
everything inside the inode if possible, but fall over to
creating a block if the extended attribute is too big.
Change-Id: I40ebb63975b15ecd8c565486e171b4d50cd4dfaa
diff --git a/ext4_utils/allocate.c b/ext4_utils/allocate.c
index 3229abe..5c60e92 100644
--- a/ext4_utils/allocate.c
+++ b/ext4_utils/allocate.c
@@ -60,6 +60,12 @@
u16 used_dirs;
};
+struct xattr_list_element {
+ struct ext4_inode *inode;
+ struct ext4_xattr_header *header;
+ struct xattr_list_element *next;
+};
+
struct block_allocation *create_allocation()
{
struct block_allocation *alloc = malloc(sizeof(struct block_allocation));
@@ -74,6 +80,25 @@
return alloc;
}
+static struct ext4_xattr_header *xattr_list_find(struct ext4_inode *inode)
+{
+ struct xattr_list_element *element;
+ for (element = aux_info.xattrs; element != NULL; element = element->next) {
+ if (element->inode == inode)
+ return element->header;
+ }
+ return NULL;
+}
+
+static void xattr_list_insert(struct ext4_inode *inode, struct ext4_xattr_header *header)
+{
+ struct xattr_list_element *element = malloc(sizeof(struct xattr_list_element));
+ element->inode = inode;
+ element->header = header;
+ element->next = aux_info.xattrs;
+ aux_info.xattrs = element;
+}
+
static void region_list_remove(struct region_list *list, struct region *reg)
{
if (reg->prev)
@@ -673,6 +698,35 @@
info.inode_size);
}
+struct ext4_xattr_header *get_xattr_block_for_inode(struct ext4_inode *inode)
+{
+ struct ext4_xattr_header *block = xattr_list_find(inode);
+ if (block != NULL)
+ return block;
+
+ u32 block_num = allocate_block();
+ block = calloc(info.block_size, 1);
+ if (block == NULL) {
+ error("get_xattr: failed to allocate %d", info.block_size);
+ return NULL;
+ }
+
+ block->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
+ block->h_refcount = cpu_to_le32(1);
+ block->h_blocks = cpu_to_le32(1);
+ inode->i_blocks_lo = cpu_to_le32(le32_to_cpu(inode->i_blocks_lo) + (info.block_size / 512));
+ inode->i_file_acl_lo = cpu_to_le32(block_num);
+
+ int result = sparse_file_add_data(info.sparse_file, block, info.block_size, block_num);
+ if (result != 0) {
+ error("get_xattr: sparse_file_add_data failure %d", result);
+ free(block);
+ return NULL;
+ }
+ xattr_list_insert(inode, block);
+ return block;
+}
+
/* Mark the first len inodes in a block group as used */
u32 reserve_inodes(int bg, u32 num)
{
diff --git a/ext4_utils/allocate.h b/ext4_utils/allocate.h
index 0575e84..7a3ffed 100644
--- a/ext4_utils/allocate.h
+++ b/ext4_utils/allocate.h
@@ -21,6 +21,7 @@
#include "ext4_utils.h"
#include "ext4.h"
+#include "xattr.h"
struct block_allocation;
@@ -31,6 +32,7 @@
int block_allocation_num_regions(struct block_allocation *alloc);
int block_allocation_len(struct block_allocation *alloc);
struct ext4_inode *get_inode(u32 inode);
+struct ext4_xattr_header *get_xattr_block_for_inode(struct ext4_inode *inode);
void reduce_allocation(struct block_allocation *alloc, u32 len);
u32 get_block(struct block_allocation *alloc, u32 block);
u32 get_oob_block(struct block_allocation *alloc, u32 block);
diff --git a/ext4_utils/contents.c b/ext4_utils/contents.c
index 4d45f67..13e0510 100644
--- a/ext4_utils/contents.c
+++ b/ext4_utils/contents.c
@@ -17,6 +17,8 @@
#include <sys/stat.h>
#include <string.h>
#include <stdio.h>
+#include <linux/capability.h>
+#include <linux/xattr.h>
#include "ext4_utils.h"
#include "ext4.h"
@@ -242,44 +244,228 @@
return 0;
}
-#define XATTR_SELINUX_SUFFIX "selinux"
-
-/* XXX */
-#define cpu_to_le32(x) (x)
-#define cpu_to_le16(x) (x)
-
-int inode_set_selinux(u32 inode_num, const char *secon)
+/*
+ * Returns the amount of free space available in the specified
+ * xattr region
+ */
+static size_t xattr_free_space(struct ext4_xattr_entry *entry, char *end)
{
- struct ext4_inode *inode = get_inode(inode_num);
- u32 *hdr;
- struct ext4_xattr_entry *entry;
- size_t name_len = strlen(XATTR_SELINUX_SUFFIX);
- size_t value_len;
- size_t size, min_offs;
- char *val;
+ while(!IS_LAST_ENTRY(entry) && (((char *) entry) < end)) {
+ end -= EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size));
+ entry = EXT4_XATTR_NEXT(entry);
+ }
- if (!secon)
+ if (((char *) entry) > end) {
+ error("unexpected read beyond end of xattr space");
return 0;
+ }
- if (!inode)
+ return end - ((char *) entry);
+}
+
+/*
+ * Returns a pointer to the free space immediately after the
+ * last xattr element
+ */
+static struct ext4_xattr_entry* xattr_get_last(struct ext4_xattr_entry *entry)
+{
+ for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+ // skip entry
+ }
+ return entry;
+}
+
+/*
+ * assert that the elements in the ext4 xattr section are in sorted order
+ *
+ * The ext4 filesystem requires extended attributes to be sorted when
+ * they're not stored in the inode. The kernel ext4 code uses the following
+ * sorting algorithm:
+ *
+ * 1) First sort extended attributes by their name_index. For example,
+ * EXT4_XATTR_INDEX_USER (1) comes before EXT4_XATTR_INDEX_SECURITY (6).
+ * 2) If the name_indexes are equal, then sorting is based on the length
+ * of the name. For example, XATTR_SELINUX_SUFFIX ("selinux") comes before
+ * XATTR_CAPS_SUFFIX ("capability") because "selinux" is shorter than "capability"
+ * 3) If the name_index and name_length are equal, then memcmp() is used to determine
+ * which name comes first. For example, "selinux" would come before "yelinux".
+ *
+ * This method is intended to implement the sorting function defined in
+ * the Linux kernel file fs/ext4/xattr.c function ext4_xattr_find_entry().
+ */
+static void xattr_assert_sane(struct ext4_xattr_entry *entry)
+{
+ for( ; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+ struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
+ if (IS_LAST_ENTRY(next)) {
+ return;
+ }
+
+ int cmp = next->e_name_index - entry->e_name_index;
+ if (cmp == 0)
+ cmp = next->e_name_len - entry->e_name_len;
+ if (cmp == 0)
+ cmp = memcmp(next->e_name, entry->e_name, next->e_name_len);
+ if (cmp < 0) {
+ error("BUG: extended attributes are not sorted\n");
+ return;
+ }
+ if (cmp == 0) {
+ error("BUG: duplicate extended attributes detected\n");
+ return;
+ }
+ }
+}
+
+#define NAME_HASH_SHIFT 5
+#define VALUE_HASH_SHIFT 16
+
+static void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
+ struct ext4_xattr_entry *entry)
+{
+ __u32 hash = 0;
+ char *name = entry->e_name;
+ int n;
+
+ for (n = 0; n < entry->e_name_len; n++) {
+ hash = (hash << NAME_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
+ *name++;
+ }
+
+ if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+ __le32 *value = (__le32 *)((char *)header +
+ le16_to_cpu(entry->e_value_offs));
+ for (n = (le32_to_cpu(entry->e_value_size) +
+ EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
+ hash = (hash << VALUE_HASH_SHIFT) ^
+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
+ le32_to_cpu(*value++);
+ }
+ }
+ entry->e_hash = cpu_to_le32(hash);
+}
+
+#undef NAME_HASH_SHIFT
+#undef VALUE_HASH_SHIFT
+
+static struct ext4_xattr_entry* xattr_addto_range(
+ void *block_start,
+ void *block_end,
+ struct ext4_xattr_entry *first,
+ int name_index,
+ const char *name,
+ const void *value,
+ size_t value_len)
+{
+ size_t name_len = strlen(name);
+ if (name_len > 255)
+ return NULL;
+
+ size_t available_size = xattr_free_space(first, block_end);
+ size_t needed_size = EXT4_XATTR_LEN(name_len) + EXT4_XATTR_SIZE(value_len);
+
+ if (needed_size > available_size)
+ return NULL;
+
+ struct ext4_xattr_entry *new_entry = xattr_get_last(first);
+ memset(new_entry, 0, EXT4_XATTR_LEN(name_len));
+
+ new_entry->e_name_len = name_len;
+ new_entry->e_name_index = name_index;
+ memcpy(new_entry->e_name, name, name_len);
+ new_entry->e_value_block = 0;
+ new_entry->e_value_size = cpu_to_le32(value_len);
+
+ char *val = (char *) new_entry + available_size - EXT4_XATTR_SIZE(value_len);
+ size_t e_value_offs = val - (char *) block_start;
+
+ new_entry->e_value_offs = cpu_to_le16(e_value_offs);
+ memset(val, 0, EXT4_XATTR_SIZE(value_len));
+ memcpy(val, value, value_len);
+
+ xattr_assert_sane(first);
+ return new_entry;
+}
+
+static int xattr_addto_inode(struct ext4_inode *inode, int name_index,
+ const char *name, const void *value, size_t value_len)
+{
+ struct ext4_xattr_ibody_header *hdr = (struct ext4_xattr_ibody_header *) (inode + 1);
+ struct ext4_xattr_entry *first = (struct ext4_xattr_entry *) (hdr + 1);
+ char *block_end = ((char *) inode) + info.inode_size;
+
+ struct ext4_xattr_entry *result =
+ xattr_addto_range(first, block_end, first, name_index, name, value, value_len);
+
+ if (result == NULL)
return -1;
- hdr = (u32 *) (inode + 1);
- *hdr = cpu_to_le32(EXT4_XATTR_MAGIC);
- entry = (struct ext4_xattr_entry *) (hdr+1);
- memset(entry, 0, EXT4_XATTR_LEN(name_len));
- entry->e_name_index = EXT4_XATTR_INDEX_SECURITY;
- entry->e_name_len = name_len;
- memcpy(entry->e_name, XATTR_SELINUX_SUFFIX, name_len);
- value_len = strlen(secon)+1;
- entry->e_value_size = cpu_to_le32(value_len);
- min_offs = (char *)inode + info.inode_size - (char*) entry;
- size = EXT4_XATTR_SIZE(value_len);
- val = (char *)entry + min_offs - size;
- entry->e_value_offs = cpu_to_le16(min_offs - size);
- memset(val + size - EXT4_XATTR_PAD, 0, EXT4_XATTR_PAD);
- memcpy(val, secon, value_len);
+ hdr->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
inode->i_extra_isize = cpu_to_le16(sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE);
return 0;
}
+
+static int xattr_addto_block(struct ext4_inode *inode, int name_index,
+ const char *name, const void *value, size_t value_len)
+{
+ struct ext4_xattr_header *header = get_xattr_block_for_inode(inode);
+ if (!header)
+ return -1;
+
+ struct ext4_xattr_entry *first = (struct ext4_xattr_entry *) (header + 1);
+ char *block_end = ((char *) header) + info.block_size;
+
+ struct ext4_xattr_entry *result =
+ xattr_addto_range(header, block_end, first, name_index, name, value, value_len);
+
+ if (result == NULL)
+ return -1;
+
+ ext4_xattr_hash_entry(header, result);
+ return 0;
+}
+
+
+static int xattr_add(u32 inode_num, int name_index, const char *name,
+ const void *value, size_t value_len)
+{
+ if (!value)
+ return 0;
+
+ struct ext4_inode *inode = get_inode(inode_num);
+
+ if (!inode)
+ return -1;
+
+ int result = xattr_addto_inode(inode, name_index, name, value, value_len);
+ if (result != 0) {
+ result = xattr_addto_block(inode, name_index, name, value, value_len);
+ }
+ return result;
+}
+
+int inode_set_selinux(u32 inode_num, const char *secon)
+{
+ return xattr_add(inode_num, EXT4_XATTR_INDEX_SECURITY,
+ XATTR_SELINUX_SUFFIX, secon, strlen(secon) + 1);
+}
+
+int inode_set_capabilities(u32 inode_num, uint64_t capabilities) {
+ if (capabilities == 0)
+ return 0;
+
+ struct vfs_cap_data cap_data;
+ memset(&cap_data, 0, sizeof(cap_data));
+
+ cap_data.magic_etc = VFS_CAP_REVISION | VFS_CAP_FLAGS_EFFECTIVE;
+ cap_data.data[0].permitted = (uint32_t) (capabilities & 0xffffffff);
+ cap_data.data[0].inheritable = 0;
+ cap_data.data[1].permitted = (uint32_t) (capabilities >> 32);
+ cap_data.data[1].inheritable = 0;
+
+ return xattr_add(inode_num, EXT4_XATTR_INDEX_SECURITY,
+ XATTR_CAPS_SUFFIX, &cap_data, sizeof(cap_data));
+}
+
diff --git a/ext4_utils/contents.h b/ext4_utils/contents.h
index 751c0b3..4272000 100644
--- a/ext4_utils/contents.h
+++ b/ext4_utils/contents.h
@@ -30,6 +30,7 @@
u32 *inode;
u32 mtime;
char *secon;
+ uint64_t capabilities;
};
u32 make_directory(u32 dir_inode_num, u32 entries, struct dentry *dentries,
@@ -38,4 +39,5 @@
u32 make_link(const char *link);
int inode_set_permissions(u32 inode_num, u16 mode, u16 uid, u16 gid, u32 mtime);
int inode_set_selinux(u32 inode_num, const char *secon);
+int inode_set_capabilities(u32 inode_num, uint64_t capabilities);
#endif
diff --git a/ext4_utils/ext4_utils.c b/ext4_utils/ext4_utils.c
index 4b87c6e..c3bec96 100644
--- a/ext4_utils/ext4_utils.c
+++ b/ext4_utils/ext4_utils.c
@@ -126,6 +126,7 @@
aux_info.bg_desc = calloc(info.block_size, aux_info.bg_desc_blocks);
if (!aux_info.bg_desc)
critical_error_errno("calloc");
+ aux_info.xattrs = NULL;
}
void ext4_free_fs_aux_info()
diff --git a/ext4_utils/ext4_utils.h b/ext4_utils/ext4_utils.h
index 0d0b6bc..0a9bd56 100644
--- a/ext4_utils/ext4_utils.h
+++ b/ext4_utils/ext4_utils.h
@@ -36,6 +36,7 @@
#include <stdlib.h>
#include <string.h>
#include <setjmp.h>
+#include <stdint.h>
#if defined(__APPLE__) && defined(__MACH__)
#define lseek64 lseek
@@ -84,6 +85,12 @@
#define __u16 u16
#define __u8 u8
+/* XXX */
+#define cpu_to_le32(x) (x)
+#define cpu_to_le16(x) (x)
+#define le32_to_cpu(x) (x)
+#define le16_to_cpu(x) (x)
+
typedef unsigned long long u64;
typedef signed long long s64;
typedef unsigned int u32;
@@ -91,6 +98,7 @@
typedef unsigned char u8;
struct block_group_info;
+struct xattr_list_element;
struct ext2_group_desc {
__le32 bg_block_bitmap;
@@ -130,6 +138,7 @@
struct ext4_super_block **backup_sb;
struct ext2_group_desc *bg_desc;
struct block_group_info *bgs;
+ struct xattr_list_element *xattrs;
u32 first_data_block;
u64 len_blocks;
u32 inode_table_blocks;
@@ -171,7 +180,7 @@
u16 ext4_crc16(u16 crc_in, const void *buf, int size);
typedef void (*fs_config_func_t)(const char *path, int dir, unsigned *uid, unsigned *gid,
- unsigned *mode);
+ unsigned *mode, uint64_t *capabilities);
struct selabel_handle;
diff --git a/ext4_utils/make_ext4fs.c b/ext4_utils/make_ext4fs.c
index 17b7ae6..c2a2665 100644
--- a/ext4_utils/make_ext4fs.c
+++ b/ext4_utils/make_ext4fs.c
@@ -164,16 +164,18 @@
dentries[i].size = stat.st_size;
dentries[i].mode = stat.st_mode & (S_ISUID|S_ISGID|S_ISVTX|S_IRWXU|S_IRWXG|S_IRWXO);
dentries[i].mtime = stat.st_mtime;
+ uint64_t capabilities;
if (fs_config_func != NULL) {
#ifdef ANDROID
unsigned int mode = 0;
unsigned int uid = 0;
unsigned int gid = 0;
int dir = S_ISDIR(stat.st_mode);
- fs_config_func(dentries[i].path, dir, &uid, &gid, &mode);
+ fs_config_func(dentries[i].path, dir, &uid, &gid, &mode, &capabilities);
dentries[i].mode = mode;
dentries[i].uid = uid;
dentries[i].gid = gid;
+ dentries[i].capabilities = capabilities;
#else
error("can't set android permissions - built without android support");
#endif
@@ -270,9 +272,20 @@
dentries[i].mtime);
if (ret)
error("failed to set permissions on %s\n", dentries[i].path);
+
+ /*
+ * It's important to call inode_set_selinux() before
+ * inode_set_capabilities(). Extended attributes need to
+ * be stored sorted order, and we guarantee this by making
+ * the calls in the proper order.
+ * Please see xattr_assert_sane() in contents.c
+ */
ret = inode_set_selinux(entry_inode, dentries[i].secon);
if (ret)
error("failed to set SELinux context on %s\n", dentries[i].path);
+ ret = inode_set_capabilities(entry_inode, dentries[i].capabilities);
+ if (ret)
+ error("failed to set capability on %s\n", dentries[i].path);
free(dentries[i].path);
free(dentries[i].full_path);
@@ -502,7 +515,8 @@
info.inodes_per_group = compute_inodes_per_group();
info.feat_compat |=
- EXT4_FEATURE_COMPAT_RESIZE_INODE;
+ EXT4_FEATURE_COMPAT_RESIZE_INODE |
+ EXT4_FEATURE_COMPAT_EXT_ATTR;
info.feat_ro_compat |=
EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER |
diff --git a/ext4_utils/xattr.h b/ext4_utils/xattr.h
index 2c6d9cc..60c01ce 100644
--- a/ext4_utils/xattr.h
+++ b/ext4_utils/xattr.h
@@ -1,8 +1,24 @@
#include <sys/types.h>
+#ifndef _SYSTEM_EXTRAS_EXT4_UTILS_XATTR_H
+#define _SYSTEM_EXTRAS_EXT4_UTILS_XATTR_H 1
+
#define EXT4_XATTR_MAGIC 0xEA020000
#define EXT4_XATTR_INDEX_SECURITY 6
+struct ext4_xattr_header {
+ __le32 h_magic;
+ __le32 h_refcount;
+ __le32 h_blocks;
+ __le32 h_hash;
+ __le32 h_checksum;
+ __u32 h_reserved[3];
+};
+
+struct ext4_xattr_ibody_header {
+ __le32 h_magic;
+};
+
struct ext4_xattr_entry {
__u8 e_name_len;
__u8 e_name_index;
@@ -19,5 +35,11 @@
#define EXT4_XATTR_LEN(name_len) \
(((name_len) + EXT4_XATTR_ROUND + \
sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
+#define EXT4_XATTR_NEXT(entry) \
+ ((struct ext4_xattr_entry *)( \
+ (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
#define EXT4_XATTR_SIZE(size) \
(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
+
+#endif /* !_SYSTEM_EXTRAS_EXT4_UTILS_XATTR_H */