--- share/man/man9/osd.9.orig +++ share/man/man9/osd.9 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 5, 2011 +.Dd March 30, 2016 .Dt OSD 9 .Os .Sh NAME @@ -33,6 +33,9 @@ .Nm osd_register , .Nm osd_deregister , .Nm osd_set , +.Nm osd_reserve , +.Nm osd_set_reserved , +.Nm osd_free_reserved , .Nm osd_get , .Nm osd_del , .Nm osd_call , @@ -63,6 +66,22 @@ .Fa "void *value" .Fc .Ft void * +.Fo osd_reserve +.Fa "u_int slot" +.Fc +.Ft int +.Fo osd_set_reserved +.Fa "u_int type" +.Fa "struct osd *osd" +.Fa "u_int slot" +.Fa "void *rsv" +.Fa "void *value" +.Fc +.Ft void +.Fo osd_free_reserved +.Fa "void *rsv" +.Fc +.Ft void * .Fo osd_get .Fa "u_int type" .Fa "struct osd *osd" @@ -198,6 +217,15 @@ .Fa osd . .Pp The +.Fn osd_set_reserved +function does the same as +.Fn osd_set , +but with an extra argument +.Fa rsv +that is internal-use memory previously allocated via +.Fn osd_reserve . +.Pp +The .Fn osd_get function returns the data pointer associated with a kernel data structure's .Vt struct osd @@ -324,6 +352,24 @@ .Xr realloc 9 calls. .Pp +It is possible for +.Fn osd_set +to fail to allocate this array. To ensure that such allocation succeeds, +.Fn osd_reserve +may be called (in a non-blocking context), and it will pre-allocate the +memory via +.Xr malloc 9 +with M_WAITOK. +Then this pre-allocated memory is passed to +.Fn osd_set_reserved , +which will use it if necessary or otherwise discard it. +The memory may also be explicitly discarded by calling +.Fn osd_free_reserved . +As this method always allocates memory whether or not it is ultimately needed, +it should be used only rarely, such as in the unlikely event that +.Fn osd_set +fails. +.Pp The .Nm API is geared towards slot identifiers storing pointers to the same underlying @@ -359,15 +405,27 @@ returns the slot identifier for the newly registered data type. .Pp .Fn osd_set -returns zero on success or ENOMEM if the specified type/slot identifier pair +and +.Fn osd_set_reserved +return zero on success or ENOMEM if the specified type/slot identifier pair triggered an internal .Xr realloc 9 -which failed. +which failed +.Fn ( osd_set_reserved +will always succeed when +.Fa rsv +is non-NULL). .Pp .Fn osd_get returns the data pointer for the specified type/slot identifier pair, or NULL if the slot has not been initialised yet. .Pp +.Fn osd_reserve +returns a pointer suitable for passing to +.Fn osd_set_reserved +or +.Fn osd_free_reserved . +.Pp .Fn osd_call returns zero if no method is run or the method for each slot runs successfully. If a method for a slot returns non-zero, --- sys/kern/kern_osd.c.orig +++ sys/kern/kern_osd.c @@ -44,6 +44,23 @@ /* OSD (Object Specific Data) */ +/* + * Lock key: + * (m) osd_module_lock + * (o) osd_object_lock + * (l) osd_list_lock + */ +struct osd_master { + struct sx osd_module_lock; + struct rmlock osd_object_lock; + struct mtx osd_list_lock; + LIST_HEAD(, osd) osd_list; /* (l) */ + osd_destructor_t *osd_destructors; /* (o) */ + osd_method_t *osd_methods; /* (m) */ + u_int osd_ntslots; /* (m) */ + const u_int osd_nmethods; +}; + static MALLOC_DEFINE(M_OSD, "osd", "Object Specific Data"); static int osd_debug = 0; @@ -62,25 +79,12 @@ int list_locked); /* - * Lists of objects with OSD. - * - * Lock key: - * (m) osd_module_lock - * (o) osd_object_lock - * (l) osd_list_lock + * List of objects with OSD. */ -static LIST_HEAD(, osd) osd_list[OSD_LAST + 1]; /* (m) */ -static osd_method_t *osd_methods[OSD_LAST + 1]; /* (m) */ -static u_int osd_nslots[OSD_LAST + 1]; /* (m) */ -static osd_destructor_t *osd_destructors[OSD_LAST + 1]; /* (o) */ -static const u_int osd_nmethods[OSD_LAST + 1] = { - [OSD_JAIL] = PR_MAXMETHOD, +struct osd_master osdm[OSD_LAST + 1] = { + [OSD_JAIL] = { .osd_nmethods = PR_MAXMETHOD }, }; -static struct sx osd_module_lock[OSD_LAST + 1]; -static struct rmlock osd_object_lock[OSD_LAST + 1]; -static struct mtx osd_list_lock[OSD_LAST + 1]; - static void osd_default_destructor(void *value __unused) { @@ -102,12 +106,12 @@ if (destructor == NULL) destructor = osd_default_destructor; - sx_xlock(&osd_module_lock[type]); + sx_xlock(&osdm[type].osd_module_lock); /* * First, we try to find unused slot. */ - for (i = 0; i < osd_nslots[type]; i++) { - if (osd_destructors[type][i] == NULL) { + for (i = 0; i < osdm[type].osd_ntslots; i++) { + if (osdm[type].osd_destructors[i] == NULL) { OSD_DEBUG("Unused slot found (type=%u, slot=%u).", type, i); break; @@ -116,31 +120,31 @@ /* * If no unused slot was found, allocate one. */ - if (i == osd_nslots[type]) { - osd_nslots[type]++; - if (osd_nmethods[type] != 0) - osd_methods[type] = realloc(osd_methods[type], - sizeof(osd_method_t) * osd_nslots[type] * - osd_nmethods[type], M_OSD, M_WAITOK); - newptr = malloc(sizeof(osd_destructor_t) * osd_nslots[type], - M_OSD, M_WAITOK); - rm_wlock(&osd_object_lock[type]); - bcopy(osd_destructors[type], newptr, + if (i == osdm[type].osd_ntslots) { + osdm[type].osd_ntslots++; + if (osdm[type].osd_nmethods != 0) + osdm[type].osd_methods = realloc(osdm[type].osd_methods, + sizeof(osd_method_t) * osdm[type].osd_ntslots * + osdm[type].osd_nmethods, M_OSD, M_WAITOK); + newptr = malloc(sizeof(osd_destructor_t) * + osdm[type].osd_ntslots, M_OSD, M_WAITOK); + rm_wlock(&osdm[type].osd_object_lock); + bcopy(osdm[type].osd_destructors, newptr, sizeof(osd_destructor_t) * i); - free(osd_destructors[type], M_OSD); - osd_destructors[type] = newptr; - rm_wunlock(&osd_object_lock[type]); + free(osdm[type].osd_destructors, M_OSD); + osdm[type].osd_destructors = newptr; + rm_wunlock(&osdm[type].osd_object_lock); OSD_DEBUG("New slot allocated (type=%u, slot=%u).", type, i + 1); } - osd_destructors[type][i] = destructor; - if (osd_nmethods[type] != 0) { - for (m = 0; m < osd_nmethods[type]; m++) - osd_methods[type][i * osd_nmethods[type] + m] = - methods != NULL ? methods[m] : NULL; + osdm[type].osd_destructors[i] = destructor; + if (osdm[type].osd_nmethods != 0) { + for (m = 0; m < osdm[type].osd_nmethods; m++) + osdm[type].osd_methods[i * osdm[type].osd_nmethods + m] + = methods != NULL ? methods[m] : NULL; } - sx_xunlock(&osd_module_lock[type]); + sx_xunlock(&osdm[type].osd_module_lock); return (i + 1); } @@ -151,37 +155,37 @@ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); KASSERT(slot > 0, ("Invalid slot.")); - KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot.")); + KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot.")); - sx_xlock(&osd_module_lock[type]); - rm_wlock(&osd_object_lock[type]); + sx_xlock(&osdm[type].osd_module_lock); + rm_wlock(&osdm[type].osd_object_lock); /* * Free all OSD for the given slot. */ - mtx_lock(&osd_list_lock[type]); - LIST_FOREACH_SAFE(osd, &osd_list[type], osd_next, tosd) + mtx_lock(&osdm[type].osd_list_lock); + LIST_FOREACH_SAFE(osd, &osdm[type].osd_list, osd_next, tosd) do_osd_del(type, osd, slot, 1); - mtx_unlock(&osd_list_lock[type]); + mtx_unlock(&osdm[type].osd_list_lock); /* * Set destructor to NULL to free the slot. */ - osd_destructors[type][slot - 1] = NULL; - if (slot == osd_nslots[type]) { - osd_nslots[type]--; - osd_destructors[type] = realloc(osd_destructors[type], - sizeof(osd_destructor_t) * osd_nslots[type], M_OSD, + osdm[type].osd_destructors[slot - 1] = NULL; + if (slot == osdm[type].osd_ntslots) { + osdm[type].osd_ntslots--; + osdm[type].osd_destructors = realloc(osdm[type].osd_destructors, + sizeof(osd_destructor_t) * osdm[type].osd_ntslots, M_OSD, M_NOWAIT | M_ZERO); - if (osd_nmethods[type] != 0) - osd_methods[type] = realloc(osd_methods[type], - sizeof(osd_method_t) * osd_nslots[type] * - osd_nmethods[type], M_OSD, M_NOWAIT | M_ZERO); + if (osdm[type].osd_nmethods != 0) + osdm[type].osd_methods = realloc(osdm[type].osd_methods, + sizeof(osd_method_t) * osdm[type].osd_ntslots * + osdm[type].osd_nmethods, M_OSD, M_NOWAIT | M_ZERO); /* * We always reallocate to smaller size, so we assume it will * always succeed. */ - KASSERT(osd_destructors[type] != NULL && - (osd_nmethods[type] == 0 || osd_methods[type] != NULL), - ("realloc() failed")); + KASSERT(osdm[type].osd_destructors != NULL && + (osdm[type].osd_nmethods == 0 || + osdm[type].osd_methods != NULL), ("realloc() failed")); OSD_DEBUG("Deregistration of the last slot (type=%u, slot=%u).", type, slot); } else { @@ -188,68 +192,105 @@ OSD_DEBUG("Slot deregistration (type=%u, slot=%u).", type, slot); } - rm_wunlock(&osd_object_lock[type]); - sx_xunlock(&osd_module_lock[type]); + rm_wunlock(&osdm[type].osd_object_lock); + sx_xunlock(&osdm[type].osd_module_lock); } int osd_set(u_int type, struct osd *osd, u_int slot, void *value) { + + return (osd_set_reserved(type, osd, slot, NULL, value)); +} + +void * +osd_reserve(u_int slot) +{ + + KASSERT(slot > 0, ("Invalid slot.")); + + OSD_DEBUG("Reserving slot array (slot=%u).", slot); + return (malloc(sizeof(void *) * slot, M_OSD, M_WAITOK | M_ZERO)); +} + +int +osd_set_reserved(u_int type, struct osd *osd, u_int slot, void *rsv, + void *value) +{ struct rm_priotracker tracker; KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); KASSERT(slot > 0, ("Invalid slot.")); - KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot.")); + KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot.")); - rm_rlock(&osd_object_lock[type], &tracker); + rm_rlock(&osdm[type].osd_object_lock, &tracker); if (slot > osd->osd_nslots) { + void *newptr; + if (value == NULL) { OSD_DEBUG( "Not allocating null slot (type=%u, slot=%u).", type, slot); - rm_runlock(&osd_object_lock[type], &tracker); + rm_runlock(&osdm[type].osd_object_lock, &tracker); + if (rsv) + osd_free_reserved(rsv); return (0); - } else if (osd->osd_nslots == 0) { + } + + /* + * Too few slots allocated here, so we need to extend or create + * the array. + */ + if (rsv) { /* - * First OSD for this object, so we need to allocate - * space and put it onto the list. + * Use the reserve passed in (assumed to be + * the right size). */ - osd->osd_slots = malloc(sizeof(void *) * slot, M_OSD, - M_NOWAIT | M_ZERO); - if (osd->osd_slots == NULL) { - rm_runlock(&osd_object_lock[type], &tracker); - return (ENOMEM); + newptr = rsv; + if (osd->osd_nslots != 0) { + memcpy(newptr, osd->osd_slots, + sizeof(void *) * osd->osd_nslots); + free(osd->osd_slots, M_OSD); } - osd->osd_nslots = slot; - mtx_lock(&osd_list_lock[type]); - LIST_INSERT_HEAD(&osd_list[type], osd, osd_next); - mtx_unlock(&osd_list_lock[type]); - OSD_DEBUG("Setting first slot (type=%u).", type); } else { - void *newptr; - - /* - * Too few slots allocated here, needs to extend - * the array. - */ newptr = realloc(osd->osd_slots, sizeof(void *) * slot, M_OSD, M_NOWAIT | M_ZERO); if (newptr == NULL) { - rm_runlock(&osd_object_lock[type], &tracker); + rm_runlock(&osdm[type].osd_object_lock, + &tracker); return (ENOMEM); } - osd->osd_slots = newptr; - osd->osd_nslots = slot; + } + if (osd->osd_nslots == 0) { + /* + * First OSD for this object, so we need to put it + * onto the list. + */ + mtx_lock(&osdm[type].osd_list_lock); + LIST_INSERT_HEAD(&osdm[type].osd_list, osd, osd_next); + mtx_unlock(&osdm[type].osd_list_lock); + OSD_DEBUG("Setting first slot (type=%u).", type); + } else OSD_DEBUG("Growing slots array (type=%u).", type); - } - } + osd->osd_slots = newptr; + osd->osd_nslots = slot; + } else if (rsv) + osd_free_reserved(rsv); OSD_DEBUG("Setting slot value (type=%u, slot=%u, value=%p).", type, slot, value); osd->osd_slots[slot - 1] = value; - rm_runlock(&osd_object_lock[type], &tracker); + rm_runlock(&osdm[type].osd_object_lock, &tracker); return (0); } +void +osd_free_reserved(void *rsv) +{ + + OSD_DEBUG("Discarding reserved slot array."); + free(rsv, M_OSD); +} + void * osd_get(u_int type, struct osd *osd, u_int slot) { @@ -258,9 +299,9 @@ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); KASSERT(slot > 0, ("Invalid slot.")); - KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot.")); + KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot.")); - rm_rlock(&osd_object_lock[type], &tracker); + rm_rlock(&osdm[type].osd_object_lock, &tracker); if (slot > osd->osd_nslots) { value = NULL; OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot); @@ -269,7 +310,7 @@ OSD_DEBUG("Returning slot value (type=%u, slot=%u, value=%p).", type, slot, value); } - rm_runlock(&osd_object_lock[type], &tracker); + rm_runlock(&osdm[type].osd_object_lock, &tracker); return (value); } @@ -278,9 +319,9 @@ { struct rm_priotracker tracker; - rm_rlock(&osd_object_lock[type], &tracker); + rm_rlock(&osdm[type].osd_object_lock, &tracker); do_osd_del(type, osd, slot, 0); - rm_runlock(&osd_object_lock[type], &tracker); + rm_runlock(&osdm[type].osd_object_lock, &tracker); } static void @@ -290,7 +331,7 @@ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); KASSERT(slot > 0, ("Invalid slot.")); - KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot.")); + KASSERT(osdm[type].osd_destructors[slot - 1] != NULL, ("Unused slot.")); OSD_DEBUG("Deleting slot (type=%u, slot=%u).", type, slot); @@ -299,7 +340,7 @@ return; } if (osd->osd_slots[slot - 1] != NULL) { - osd_destructors[type][slot - 1](osd->osd_slots[slot - 1]); + osdm[type].osd_destructors[slot - 1](osd->osd_slots[slot - 1]); osd->osd_slots[slot - 1] = NULL; } for (i = osd->osd_nslots - 1; i >= 0; i--) { @@ -313,10 +354,10 @@ /* No values left for this object. */ OSD_DEBUG("No more slots left (type=%u).", type); if (!list_locked) - mtx_lock(&osd_list_lock[type]); + mtx_lock(&osdm[type].osd_list_lock); LIST_REMOVE(osd, osd_next); if (!list_locked) - mtx_unlock(&osd_list_lock[type]); + mtx_unlock(&osdm[type].osd_list_lock); free(osd->osd_slots, M_OSD); osd->osd_slots = NULL; osd->osd_nslots = 0; @@ -342,7 +383,7 @@ int error, i; KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); - KASSERT(method < osd_nmethods[type], ("Invalid method.")); + KASSERT(method < osdm[type].osd_nmethods, ("Invalid method.")); /* * Call this method for every slot that defines it, stopping if an @@ -349,14 +390,14 @@ * error is encountered. */ error = 0; - sx_slock(&osd_module_lock[type]); - for (i = 0; i < osd_nslots[type]; i++) { - methodfun = - osd_methods[type][i * osd_nmethods[type] + method]; + sx_slock(&osdm[type].osd_module_lock); + for (i = 0; i < osdm[type].osd_ntslots; i++) { + methodfun = osdm[type].osd_methods[i * osdm[type].osd_nmethods + + method]; if (methodfun != NULL && (error = methodfun(obj, data)) != 0) break; } - sx_sunlock(&osd_module_lock[type]); + sx_sunlock(&osdm[type].osd_module_lock); return (error); } @@ -374,14 +415,14 @@ return; } - rm_rlock(&osd_object_lock[type], &tracker); + rm_rlock(&osdm[type].osd_object_lock, &tracker); for (i = 1; i <= osd->osd_nslots; i++) { - if (osd_destructors[type][i - 1] != NULL) + if (osdm[type].osd_destructors[i - 1] != NULL) do_osd_del(type, osd, i, 0); else OSD_DEBUG("Unused slot (type=%u, slot=%u).", type, i); } - rm_runlock(&osd_object_lock[type], &tracker); + rm_runlock(&osdm[type].osd_object_lock, &tracker); OSD_DEBUG("Object exit (type=%u).", type); } @@ -391,13 +432,13 @@ u_int i; for (i = OSD_FIRST; i <= OSD_LAST; i++) { - osd_nslots[i] = 0; - LIST_INIT(&osd_list[i]); - sx_init(&osd_module_lock[i], "osd_module"); - rm_init(&osd_object_lock[i], "osd_object"); - mtx_init(&osd_list_lock[i], "osd_list", NULL, MTX_DEF); - osd_destructors[i] = NULL; - osd_methods[i] = NULL; + sx_init(&osdm[i].osd_module_lock, "osd_module"); + rm_init(&osdm[i].osd_object_lock, "osd_object"); + mtx_init(&osdm[i].osd_list_lock, "osd_list", NULL, MTX_DEF); + LIST_INIT(&osdm[i].osd_list); + osdm[i].osd_destructors = NULL; + osdm[i].osd_ntslots = 0; + osdm[i].osd_methods = NULL; } } SYSINIT(osd, SI_SUB_LOCK, SI_ORDER_ANY, osd_init, NULL); --- sys/kern/uipc_mqueue.c.orig +++ sys/kern/uipc_mqueue.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -60,8 +61,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -131,6 +132,7 @@ LIST_HEAD(,mqfs_node) mn_children; LIST_ENTRY(mqfs_node) mn_sibling; LIST_HEAD(,mqfs_vdata) mn_vnodes; + const void *mn_pr_root; int mn_refcount; mqfs_type_t mn_type; int mn_deleted; @@ -151,6 +153,11 @@ #define FPTOMQ(fp) ((struct mqueue *)(((struct mqfs_node *) \ (fp)->f_data)->mn_data)) +struct mqfs_osd { + struct task mo_task; + const void *mo_pr_root; +}; + TAILQ_HEAD(msgq, mqueue_msg); struct mqueue; @@ -218,6 +225,7 @@ static uma_zone_t mqnoti_zone; static struct vop_vector mqfs_vnodeops; static struct fileops mqueueops; +static unsigned mqfs_osd_jail_slot; /* * Directory structure construction and manipulation @@ -235,6 +243,9 @@ static void mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn); static void mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn); static int mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn); +static int mqfs_prison_create(void *obj, void *data); +static void mqfs_prison_destructor(void *data); +static void mqfs_prison_remove_task(void *context, int pending); /* * Message queue construction and maniplation @@ -435,6 +446,7 @@ node = mqnode_alloc(); strncpy(node->mn_name, name, namelen); + node->mn_pr_root = cred->cr_prison->pr_root; node->mn_type = nodetype; node->mn_refcount = 1; vfs_timestamp(&node->mn_birth); @@ -643,6 +655,10 @@ { struct mqfs_node *root; struct mqfs_info *mi; + struct prison *pr; + osd_method_t methods[PR_MAXMETHOD] = { + [PR_METHOD_CREATE] = mqfs_prison_create, + }; mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); @@ -669,6 +685,13 @@ EVENTHANDLER_PRI_ANY); mq_fdclose = mqueue_fdclose; p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING); + + /* Note current jails. */ + mqfs_osd_jail_slot = osd_jail_register(mqfs_prison_destructor, methods); + sx_slock(&allprison_lock); + TAILQ_FOREACH(pr, &allprison, pr_list) + (void)mqfs_prison_create(pr, NULL); + sx_sunlock(&allprison_lock); return (0); } @@ -678,10 +701,14 @@ static int mqfs_uninit(struct vfsconf *vfc) { + unsigned slot; struct mqfs_info *mi; if (!unloadable) return (EOPNOTSUPP); + slot = mqfs_osd_jail_slot; + mqfs_osd_jail_slot = 0; + osd_jail_deregister(slot); EVENTHANDLER_DEREGISTER(process_exit, exit_tag); mi = &mqfs_data; mqfs_destroy(mi->mi_root); @@ -799,13 +826,17 @@ * Search a directory entry */ static struct mqfs_node * -mqfs_search(struct mqfs_node *pd, const char *name, int len) +mqfs_search(struct mqfs_node *pd, const char *name, int len, struct ucred *cred) { struct mqfs_node *pn; + const void *pr_root; sx_assert(&pd->mn_info->mi_lock, SX_LOCKED); + pr_root = cred->cr_prison->pr_root; LIST_FOREACH(pn, &pd->mn_children, mn_sibling) { - if (strncmp(pn->mn_name, name, len) == 0 && + /* Only match names within the same prison root directory */ + if ((pn->mn_pr_root == NULL || pn->mn_pr_root == pr_root) && + strncmp(pn->mn_name, name, len) == 0 && pn->mn_name[len] == '\0') return (pn); } @@ -877,7 +908,7 @@ /* named node */ sx_xlock(&mqfs->mi_lock); - pn = mqfs_search(pd, pname, namelen); + pn = mqfs_search(pd, pname, namelen, cnp->cn_cred); if (pn != NULL) mqnode_addref(pn); sx_xunlock(&mqfs->mi_lock); @@ -1362,6 +1393,7 @@ struct mqfs_node *pn; struct dirent entry; struct uio *uio; + const void *pr_root; int *tmp_ncookies = NULL; off_t offset; int error, i; @@ -1386,10 +1418,18 @@ error = 0; offset = 0; + pr_root = ap->a_cred->cr_prison->pr_root; sx_xlock(&mi->mi_lock); LIST_FOREACH(pn, &pd->mn_children, mn_sibling) { entry.d_reclen = sizeof(entry); + + /* + * Only show names within the same prison root directory + * (or not associated with a prison, e.g. "." and ".."). + */ + if (pn->mn_pr_root != NULL && pn->mn_pr_root != pr_root) + continue; if (!pn->mn_fileno) mqfs_fileno_alloc(mi, pn); entry.d_fileno = pn->mn_fileno; @@ -1522,7 +1562,82 @@ #endif /* notyet */ + /* + * Set a destructor task with the prison's root + */ +static int +mqfs_prison_create(void *obj, void *data __unused) +{ + struct prison *pr = obj; + struct mqfs_osd *mo; + void *rsv; + + if (pr->pr_root == pr->pr_parent->pr_root) + return(0); + + mo = malloc(sizeof(struct mqfs_osd), M_PRISON, M_WAITOK); + rsv = osd_reserve(mqfs_osd_jail_slot); + TASK_INIT(&mo->mo_task, 0, mqfs_prison_remove_task, mo); + mtx_lock(&pr->pr_mtx); + mo->mo_pr_root = pr->pr_root; + (void)osd_jail_set_reserved(pr, mqfs_osd_jail_slot, rsv, mo); + mtx_unlock(&pr->pr_mtx); + return (0); +} + +/* + * Queue the task for after jail/OSD locks are released + */ +static void +mqfs_prison_destructor(void *data) +{ + struct mqfs_osd *mo = data; + + if (mqfs_osd_jail_slot != 0) + taskqueue_enqueue(taskqueue_thread, &mo->mo_task); + else + free(mo, M_PRISON); +} + +/* + * See if this prison root is obsolete, and clean up associated queues if it is + */ +static void +mqfs_prison_remove_task(void *context, int pending) +{ + struct mqfs_osd *mo = context; + struct mqfs_node *pn, *tpn; + const struct prison *pr; + const void *pr_root; + int found; + + pr_root = mo->mo_pr_root; + found = 0; + sx_slock(&allprison_lock); + TAILQ_FOREACH(pr, &allprison, pr_list) { + if (pr->pr_root == pr_root) + found = 1; + } + sx_sunlock(&allprison_lock); + if (!found) { + /* + * No jails are rooted in this directory anymore, + * so no queues should be either. + */ + sx_xlock(&mqfs_data.mi_lock); + LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children, + mn_sibling, tpn) { + if (pn->mn_pr_root == pr_root) + (void)do_unlink(pn, curthread->td_ucred); + } + sx_xunlock(&mqfs_data.mi_lock); + } + free(mo, M_PRISON); +} + + +/* * Allocate a message queue */ static struct mqueue * @@ -1982,7 +2097,7 @@ return (error); sx_xlock(&mqfs_data.mi_lock); - pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1); + pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred); if (pn == NULL) { if (!(flags & O_CREAT)) { error = ENOENT; @@ -2077,7 +2192,7 @@ return (EINVAL); sx_xlock(&mqfs_data.mi_lock); - pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1); + pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred); if (pn != NULL) error = do_unlink(pn, td->td_ucred); else --- sys/kern/uipc_sem.c.orig +++ sys/kern/uipc_sem.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -444,12 +445,24 @@ static void ksem_info_impl(struct ksem *ks, char *path, size_t size, uint32_t *value) { + const char *ks_path, *pr_path; + size_t pr_pathlen; if (ks->ks_path == NULL) return; sx_slock(&ksem_dict_lock); - if (ks->ks_path != NULL) - strlcpy(path, ks->ks_path, size); + ks_path = ks->ks_path; + if (ks_path != NULL) { + pr_path = curthread->td_ucred->cr_prison->pr_path; + if (strcmp(pr_path, "/") != 0) { + /* Return the jail-rooted pathname. */ + pr_pathlen = strlen(pr_path); + if (strncmp(ks_path, pr_path, pr_pathlen) == 0 && + ks_path[pr_pathlen] == '/') + ks_path += pr_pathlen; + } + strlcpy(path, ks_path, size); + } if (value != NULL) *value = ks->ks_value; sx_sunlock(&ksem_dict_lock); @@ -493,6 +506,8 @@ struct ksem *ks; struct file *fp; char *path; + const char *pr_path; + size_t pr_pathlen; Fnv32_t fnv; int error, fd; @@ -529,10 +544,16 @@ ks->ks_flags |= KS_ANONYMOUS; } else { path = malloc(MAXPATHLEN, M_KSEM, M_WAITOK); - error = copyinstr(name, path, MAXPATHLEN, NULL); + pr_path = td->td_ucred->cr_prison->pr_path; + /* Construct a full pathname for jailed callers. */ + pr_pathlen = strcmp(pr_path, "/") == 0 ? 0 + : strlcpy(path, pr_path, MAXPATHLEN); + error = copyinstr(name, path + pr_pathlen, + MAXPATHLEN - pr_pathlen, NULL); + /* Require paths to start with a '/' character. */ - if (error == 0 && path[0] != '/') + if (error == 0 && path[pr_pathlen] != '/') error = EINVAL; if (error) { fdclose(fdp, fp, fd, td); @@ -668,11 +689,17 @@ sys_ksem_unlink(struct thread *td, struct ksem_unlink_args *uap) { char *path; + const char *pr_path; + size_t pr_pathlen; Fnv32_t fnv; int error; path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); - error = copyinstr(uap->name, path, MAXPATHLEN, NULL); + pr_path = td->td_ucred->cr_prison->pr_path; + pr_pathlen = strcmp(pr_path, "/") == 0 ? 0 + : strlcpy(path, pr_path, MAXPATHLEN); + error = copyinstr(uap->name, path + pr_pathlen, MAXPATHLEN - pr_pathlen, + NULL); if (error) { free(path, M_TEMP); return (error); --- sys/kern/uipc_shm.c.orig +++ sys/kern/uipc_shm.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -711,6 +712,8 @@ struct shmfd *shmfd; struct file *fp; char *path; + const char *pr_path; + size_t pr_pathlen; Fnv32_t fnv; mode_t cmode; int fd, error; @@ -748,13 +751,19 @@ shmfd = shm_alloc(td->td_ucred, cmode); } else { path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK); - error = copyinstr(uap->path, path, MAXPATHLEN, NULL); + pr_path = td->td_ucred->cr_prison->pr_path; + + /* Construct a full pathname for jailed callers. */ + pr_pathlen = strcmp(pr_path, "/") == 0 ? 0 + : strlcpy(path, pr_path, MAXPATHLEN); + error = copyinstr(uap->path, path + pr_pathlen, + MAXPATHLEN - pr_pathlen, NULL); #ifdef KTRACE if (error == 0 && KTRPOINT(curthread, KTR_NAMEI)) ktrnamei(path); #endif /* Require paths to start with a '/' character. */ - if (error == 0 && path[0] != '/') + if (error == 0 && path[pr_pathlen] != '/') error = EINVAL; if (error) { fdclose(fdp, fp, fd, td); @@ -841,11 +850,17 @@ sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap) { char *path; + const char *pr_path; + size_t pr_pathlen; Fnv32_t fnv; int error; path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); - error = copyinstr(uap->path, path, MAXPATHLEN, NULL); + pr_path = td->td_ucred->cr_prison->pr_path; + pr_pathlen = strcmp(pr_path, "/") == 0 ? 0 + : strlcpy(path, pr_path, MAXPATHLEN); + error = copyinstr(uap->path, path + pr_pathlen, MAXPATHLEN - pr_pathlen, + NULL); if (error) { free(path, M_TEMP); return (error); @@ -1052,11 +1067,23 @@ void shm_path(struct shmfd *shmfd, char *path, size_t size) { + const char *shm_path, *pr_path; + size_t pr_pathlen; if (shmfd->shm_path == NULL) return; sx_slock(&shm_dict_lock); - if (shmfd->shm_path != NULL) - strlcpy(path, shmfd->shm_path, size); + shm_path = shmfd->shm_path; + if (shm_path != NULL) { + pr_path = curthread->td_ucred->cr_prison->pr_path; + if (strcmp(pr_path, "/") != 0) { + /* Return the jail-rooted pathname. */ + pr_pathlen = strlen(pr_path); + if (strncmp(shm_path, pr_path, pr_pathlen) == 0 && + shm_path[pr_pathlen] == '/') + shm_path += pr_pathlen; + } + strlcpy(path, shm_path, size); + } sx_sunlock(&shm_dict_lock); } --- sys/sys/osd.h.orig +++ sys/sys/osd.h @@ -59,6 +59,10 @@ void osd_deregister(u_int type, u_int slot); int osd_set(u_int type, struct osd *osd, u_int slot, void *value); +void *osd_reserve(u_int slot); +int osd_set_reserved(u_int type, struct osd *osd, u_int slot, void *rsv, + void *value); +void osd_free_reserved(void *rsv); void *osd_get(u_int type, struct osd *osd, u_int slot); void osd_del(u_int type, struct osd *osd, u_int slot); int osd_call(u_int type, u_int method, void *obj, void *data); @@ -71,6 +75,8 @@ osd_deregister(OSD_THREAD, (slot)) #define osd_thread_set(td, slot, value) \ osd_set(OSD_THREAD, &(td)->td_osd, (slot), (value)) +#define osd_thread_set_reserved(td, slot, rsv, value) \ + osd_set_reserved(OSD_THREAD, &(td)->td_osd, (slot), (rsv), (value)) #define osd_thread_get(td, slot) \ osd_get(OSD_THREAD, &(td)->td_osd, (slot)) #define osd_thread_del(td, slot) do { \ @@ -88,6 +94,8 @@ osd_deregister(OSD_JAIL, (slot)) #define osd_jail_set(pr, slot, value) \ osd_set(OSD_JAIL, &(pr)->pr_osd, (slot), (value)) +#define osd_jail_set_reserved(pr, slot, rsv, value) \ + osd_set_reserved(OSD_JAIL, &(pr)->pr_osd, (slot), (rsv), (value)) #define osd_jail_get(pr, slot) \ osd_get(OSD_JAIL, &(pr)->pr_osd, (slot)) #define osd_jail_del(pr, slot) \