prepare_kernel_cred(), commit_creds()

msh1307·2023년 1월 26일
0

Kernel

목록 보기
2/9

Task

리눅스에서 task는 프로그램의 실행 단위를 나타낸다.
커널에서 프로세스, 쓰레드를 모두 task로 본다.
PCB, TCB도 다 task_struct로 구현되어 있다고 한다.
각 task는 커널 메모리에 task_struct 구조체로 표현된다.
task_struct에는 사용자 신원 정보도 존재한다.
그 신원 정보를 조작해서 root 권한을 획득하는게 커널 익스플로잇의 주된 목표라고 볼 수 있고 그걸 LPE(Local Privilege Escalation)라고 한다.

struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
    /*
     * For reasons of header soup (see current_thread_info()), this
     * must be the first element of task_struct.
     */
    struct thread_info        thread_info;
#endif
    /* -1 unrunnable, 0 runnable, >0 stopped: */
    volatile long            state;

    /*
     * This begins the randomizable portion of task_struct. Only
     * scheduling-critical items should be added above here.
     */
    randomized_struct_fields_start

    void                *stack;
    refcount_t            usage;
    /* Per task flags (PF_*), defined further below: */
    unsigned int            flags;
    unsigned int            ptrace;

#ifdef CONFIG_SMP
    int                on_cpu;
    struct __call_single_node    wake_entry;
#ifdef CONFIG_THREAD_INFO_IN_TASK
    /* Current CPU: */
    unsigned int            cpu;
#endif
    unsigned int            wakee_flips;
    unsigned long            wakee_flip_decay_ts;
    struct task_struct        *last_wakee;

    /*
     * recent_used_cpu is initially set as the last CPU used by a task
     * that wakes affine another task. Waker/wakee relationships can
     * push tasks around a CPU where each wakeup moves to the next one.
     * Tracking a recently used CPU allows a quick search for a recently
     * used CPU that may be idle.
     */
    int                recent_used_cpu;
    int                wake_cpu;
#endif
    int                on_rq;

    int                prio;
    int                static_prio;
    int                normal_prio;
    unsigned int            rt_priority;

    const struct sched_class    *sched_class;
    struct sched_entity        se;
    struct sched_rt_entity        rt;
#ifdef CONFIG_CGROUP_SCHED
    struct task_group        *sched_task_group;
#endif
    struct sched_dl_entity        dl;

#ifdef CONFIG_UCLAMP_TASK
    /*
     * Clamp values requested for a scheduling entity.
     * Must be updated with task_rq_lock() held.
     */
    struct uclamp_se        uclamp_req[UCLAMP_CNT];
    /*
     * Effective clamp values used for a scheduling entity.
     * Must be updated with task_rq_lock() held.
     */
    struct uclamp_se        uclamp[UCLAMP_CNT];
#endif

#ifdef CONFIG_PREEMPT_NOTIFIERS
    /* List of struct preempt_notifier: */
    struct hlist_head        preempt_notifiers;
#endif

#ifdef CONFIG_BLK_DEV_IO_TRACE
    unsigned int            btrace_seq;
#endif

    unsigned int            policy;
    int                nr_cpus_allowed;
    const cpumask_t            *cpus_ptr;
    cpumask_t            cpus_mask;
    void                *migration_pending;
#ifdef CONFIG_SMP
    unsigned short            migration_disabled;
#endif
    unsigned short            migration_flags;

#ifdef CONFIG_PREEMPT_RCU
    int                rcu_read_lock_nesting;
    union rcu_special        rcu_read_unlock_special;
    struct list_head        rcu_node_entry;
    struct rcu_node            *rcu_blocked_node;
#endif /* #ifdef CONFIG_PREEMPT_RCU */

#ifdef CONFIG_TASKS_RCU
    unsigned long            rcu_tasks_nvcsw;
    u8                rcu_tasks_holdout;
    u8                rcu_tasks_idx;
    int                rcu_tasks_idle_cpu;
    struct list_head        rcu_tasks_holdout_list;
#endif /* #ifdef CONFIG_TASKS_RCU */

#ifdef CONFIG_TASKS_TRACE_RCU
    int                trc_reader_nesting;
    int                trc_ipi_to_cpu;
    union rcu_special        trc_reader_special;
    bool                trc_reader_checked;
    struct list_head        trc_holdout_list;
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */

    struct sched_info        sched_info;

    struct list_head        tasks;
#ifdef CONFIG_SMP
    struct plist_node        pushable_tasks;
    struct rb_node            pushable_dl_tasks;
#endif

    struct mm_struct        *mm;
    struct mm_struct        *active_mm;

    /* Per-thread vma caching: */
    struct vmacache            vmacache;

#ifdef SPLIT_RSS_COUNTING
    struct task_rss_stat        rss_stat;
#endif
    int                exit_state;
    int                exit_code;
    int                exit_signal;
    /* The signal sent when the parent dies: */
    int                pdeath_signal;
    /* JOBCTL_*, siglock protected: */
    unsigned long            jobctl;

    /* Used for emulating ABI behavior of previous Linux versions: */
    unsigned int            personality;

    /* Scheduler bits, serialized by scheduler locks: */
    unsigned            sched_reset_on_fork:1;
    unsigned            sched_contributes_to_load:1;
    unsigned            sched_migrated:1;
#ifdef CONFIG_PSI
    unsigned            sched_psi_wake_requeue:1;
#endif

    /* Force alignment to the next boundary: */
    unsigned            :0;

    /* Unserialized, strictly 'current' */

    /*
     * This field must not be in the scheduler word above due to wakelist
     * queueing no longer being serialized by p->on_cpu. However:
     *
     * p->XXX = X;            ttwu()
     * schedule()              if (p->on_rq && ..) // false
     *   smp_mb__after_spinlock();      if (smp_load_acquire(&p->on_cpu) && //true
     *   deactivate_task()              ttwu_queue_wakelist())
     *     p->on_rq = 0;            p->sched_remote_wakeup = Y;
     *
     * guarantees all stores of 'current' are visible before
     * ->sched_remote_wakeup gets used, so it can be in this word.
     */
    unsigned            sched_remote_wakeup:1;

    /* Bit to tell LSMs we're in execve(): */
    unsigned            in_execve:1;
    unsigned            in_iowait:1;
#ifndef TIF_RESTORE_SIGMASK
    unsigned            restore_sigmask:1;
#endif
#ifdef CONFIG_MEMCG
    unsigned            in_user_fault:1;
#endif
#ifdef CONFIG_COMPAT_BRK
    unsigned            brk_randomized:1;
#endif
#ifdef CONFIG_CGROUPS
    /* disallow userland-initiated cgroup migration */
    unsigned            no_cgroup_migration:1;
    /* task is frozen/stopped (used by the cgroup freezer) */
    unsigned            frozen:1;
#endif
#ifdef CONFIG_BLK_CGROUP
    unsigned            use_memdelay:1;
#endif
#ifdef CONFIG_PSI
    /* Stalled due to lack of memory */
    unsigned            in_memstall:1;
#endif
#ifdef CONFIG_PAGE_OWNER
    /* Used by page_owner=on to detect recursion in page tracking. */
    unsigned            in_page_owner:1;
#endif

    unsigned long            atomic_flags; /* Flags requiring atomic access. */

    struct restart_block        restart_block;

    pid_t                pid;
    pid_t                tgid;

#ifdef CONFIG_STACKPROTECTOR
    /* Canary value for the -fstack-protector GCC feature: */
    unsigned long            stack_canary;
#endif
    /*
     * Pointers to the (original) parent process, youngest child, younger sibling,
     * older sibling, respectively.  (p->father can be replaced with
     * p->real_parent->pid)
     */

    /* Real parent process: */
    struct task_struct __rcu    *real_parent;

    /* Recipient of SIGCHLD, wait4() reports: */
    struct task_struct __rcu    *parent;

    /*
     * Children/sibling form the list of natural children:
     */
    struct list_head        children;
    struct list_head        sibling;
    struct task_struct        *group_leader;

    /*
     * 'ptraced' is the list of tasks this task is using ptrace() on.
     *
     * This includes both natural children and PTRACE_ATTACH targets.
     * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
     */
    struct list_head        ptraced;
    struct list_head        ptrace_entry;

    /* PID/PID hash table linkage. */
    struct pid            *thread_pid;
    struct hlist_node        pid_links[PIDTYPE_MAX];
    struct list_head        thread_group;
    struct list_head        thread_node;

    struct completion        *vfork_done;

    /* CLONE_CHILD_SETTID: */
    int __user            *set_child_tid;

    /* CLONE_CHILD_CLEARTID: */
    int __user            *clear_child_tid;

    /* PF_IO_WORKER */
    void                *pf_io_worker;

    u64                utime;
    u64                stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
    u64                utimescaled;
    u64                stimescaled;
#endif
    u64                gtime;
    struct prev_cputime        prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
    struct vtime            vtime;
#endif

#ifdef CONFIG_NO_HZ_FULL
    atomic_t            tick_dep_mask;
#endif
    /* Context switch counts: */
    unsigned long            nvcsw;
    unsigned long            nivcsw;

    /* Monotonic time in nsecs: */
    u64                start_time;

    /* Boot based time in nsecs: */
    u64                start_boottime;

    /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
    unsigned long            min_flt;
    unsigned long            maj_flt;

    /* Empty if CONFIG_POSIX_CPUTIMERS=n */
    struct posix_cputimers        posix_cputimers;

#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
    struct posix_cputimers_work    posix_cputimers_work;
#endif

    /* Process credentials: */

    /* Tracer's credentials at attach: */
    const struct cred __rcu        *ptracer_cred;

    /* Objective and real subjective task credentials (COW): */
    const struct cred __rcu        *real_cred;

    /* Effective (overridable) subjective task credentials (COW): */
    const struct cred __rcu        *cred;

#ifdef CONFIG_KEYS
    /* Cached requested key. */
    struct key            *cached_requested_key;
#endif

    /*
     * executable name, excluding path.
     *
     * - normally initialized setup_new_exec()
     * - access it with [gs]et_task_comm()
     * - lock it with task_lock()
     */
    char                comm[TASK_COMM_LEN];

    struct nameidata        *nameidata;

#ifdef CONFIG_SYSVIPC
    struct sysv_sem            sysvsem;
    struct sysv_shm            sysvshm;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
    unsigned long            last_switch_count;
    unsigned long            last_switch_time;
#endif
    /* Filesystem information: */
    struct fs_struct        *fs;

    /* Open file information: */
    struct files_struct        *files;

#ifdef CONFIG_IO_URING
    struct io_uring_task        *io_uring;
#endif

    /* Namespaces: */
    struct nsproxy            *nsproxy;

    /* Signal handlers: */
    struct signal_struct        *signal;
    struct sighand_struct __rcu        *sighand;
    struct sigqueue            *sigqueue_cache;
    sigset_t            blocked;
    sigset_t            real_blocked;
    /* Restored if set_restore_sigmask() was used: */
    sigset_t            saved_sigmask;
    struct sigpending        pending;
    unsigned long            sas_ss_sp;
    size_t                sas_ss_size;
    unsigned int            sas_ss_flags;

    struct callback_head        *task_works;

#ifdef CONFIG_AUDIT
#ifdef CONFIG_AUDITSYSCALL
    struct audit_context        *audit_context;
#endif
    kuid_t                loginuid;
    unsigned int            sessionid;
#endif
    struct seccomp            seccomp;
    struct syscall_user_dispatch    syscall_dispatch;

    /* Thread group tracking: */
    u64                parent_exec_id;
    u64                self_exec_id;

    /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
    spinlock_t            alloc_lock;

    /* Protection of the PI data structures: */
    raw_spinlock_t            pi_lock;

    struct wake_q_node        wake_q;

#ifdef CONFIG_RT_MUTEXES
    /* PI waiters blocked on a rt_mutex held by this task: */
    struct rb_root_cached        pi_waiters;
    /* Updated under owner's pi_lock and rq lock */
    struct task_struct        *pi_top_task;
    /* Deadlock detection and priority inheritance handling: */
    struct rt_mutex_waiter        *pi_blocked_on;
#endif

#ifdef CONFIG_DEBUG_MUTEXES
    /* Mutex deadlock detection: */
    struct mutex_waiter        *blocked_on;
#endif

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
    int                non_block_count;
#endif

#ifdef CONFIG_TRACE_IRQFLAGS
    struct irqtrace_events        irqtrace;
    unsigned int            hardirq_threaded;
    u64                hardirq_chain_key;
    int                softirqs_enabled;
    int                softirq_context;
    int                irq_config;
#endif
#ifdef CONFIG_PREEMPT_RT
    int                softirq_disable_cnt;
#endif

#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH            48UL
    u64                curr_chain_key;
    int                lockdep_depth;
    unsigned int            lockdep_recursion;
    struct held_lock        held_locks[MAX_LOCK_DEPTH];
#endif

#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
    unsigned int            in_ubsan;
#endif

    /* Journalling filesystem info: */
    void                *journal_info;

    /* Stacked block device info: */
    struct bio_list            *bio_list;

#ifdef CONFIG_BLOCK
    /* Stack plugging: */
    struct blk_plug            *plug;
#endif

    /* VM state: */
    struct reclaim_state        *reclaim_state;

    struct backing_dev_info        *backing_dev_info;

    struct io_context        *io_context;

#ifdef CONFIG_COMPACTION
    struct capture_control        *capture_control;
#endif
    /* Ptrace state: */
    unsigned long            ptrace_message;
    kernel_siginfo_t        *last_siginfo;

    struct task_io_accounting    ioac;
#ifdef CONFIG_PSI
    /* Pressure stall state */
    unsigned int            psi_flags;
#endif
#ifdef CONFIG_TASK_XACCT
    /* Accumulated RSS usage: */
    u64                acct_rss_mem1;
    /* Accumulated virtual memory usage: */
    u64                acct_vm_mem1;
    /* stime + utime since last update: */
    u64                acct_timexpd;
#endif
#ifdef CONFIG_CPUSETS
    /* Protected by ->alloc_lock: */
    nodemask_t            mems_allowed;
    /* Sequence number to catch updates: */
    seqcount_spinlock_t        mems_allowed_seq;
    int                cpuset_mem_spread_rotor;
    int                cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
    /* Control Group info protected by css_set_lock: */
    struct css_set __rcu        *cgroups;
    /* cg_list protected by css_set_lock and tsk->alloc_lock: */
    struct list_head        cg_list;
#endif
#ifdef CONFIG_X86_CPU_RESCTRL
    u32                closid;
    u32                rmid;
#endif
#ifdef CONFIG_FUTEX
    struct robust_list_head __user    *robust_list;
#ifdef CONFIG_COMPAT
    struct compat_robust_list_head __user *compat_robust_list;
#endif
    struct list_head        pi_state_list;
    struct futex_pi_state        *pi_state_cache;
    struct mutex            futex_exit_mutex;
    unsigned int            futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
    struct perf_event_context    *perf_event_ctxp[perf_nr_task_contexts];
    struct mutex            perf_event_mutex;
    struct list_head        perf_event_list;
#endif
#ifdef CONFIG_DEBUG_PREEMPT
    unsigned long            preempt_disable_ip;
#endif
#ifdef CONFIG_NUMA
    /* Protected by alloc_lock: */
    struct mempolicy        *mempolicy;
    short                il_prev;
    short                pref_node_fork;
#endif
#ifdef CONFIG_NUMA_BALANCING
    int                numa_scan_seq;
    unsigned int            numa_scan_period;
    unsigned int            numa_scan_period_max;
    int                numa_preferred_nid;
    unsigned long            numa_migrate_retry;
    /* Migration stamp: */
    u64                node_stamp;
    u64                last_task_numa_placement;
    u64                last_sum_exec_runtime;
    struct callback_head        numa_work;

    /*
     * This pointer is only modified for current in syscall and
     * pagefault context (and for tasks being destroyed), so it can be read
     * from any of the following contexts:
     *  - RCU read-side critical section
     *  - current->numa_group from everywhere
     *  - task's runqueue locked, task not running
     */
    struct numa_group __rcu        *numa_group;

    /*
     * numa_faults is an array split into four regions:
     * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
     * in this precise order.
     *
     * faults_memory: Exponential decaying average of faults on a per-node
     * basis. Scheduling placement decisions are made based on these
     * counts. The values remain static for the duration of a PTE scan.
     * faults_cpu: Track the nodes the process was running on when a NUMA
     * hinting fault was incurred.
     * faults_memory_buffer and faults_cpu_buffer: Record faults per node
     * during the current scan window. When the scan completes, the counts
     * in faults_memory and faults_cpu decay and these values are copied.
     */
    unsigned long            *numa_faults;
    unsigned long            total_numa_faults;

    /*
     * numa_faults_locality tracks if faults recorded during the last
     * scan window were remote/local or failed to migrate. The task scan
     * period is adapted based on the locality of the faults with different
     * weights depending on whether they were shared or private faults
     */
    unsigned long            numa_faults_locality[3];

    unsigned long            numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */

#ifdef CONFIG_RSEQ
    struct rseq __user *rseq;
    u32 rseq_sig;
    /*
     * RmW on rseq_event_mask must be performed atomically
     * with respect to preemption.
     */
    unsigned long rseq_event_mask;
#endif

    struct tlbflush_unmap_batch    tlb_ubc;

    union {
        refcount_t        rcu_users;
        struct rcu_head        rcu;
    };

    /* Cache last used pipe for splice(): */
    struct pipe_inode_info        *splice_pipe;

    struct page_frag        task_frag;

#ifdef CONFIG_TASK_DELAY_ACCT
    struct task_delay_info        *delays;
#endif

#ifdef CONFIG_FAULT_INJECTION
    int                make_it_fail;
    unsigned int            fail_nth;
#endif
    /*
     * When (nr_dirtied >= nr_dirtied_pause), it's time to call
     * balance_dirty_pages() for a dirty throttling pause:
     */
    int                nr_dirtied;
    int                nr_dirtied_pause;
    /* Start of a write-and-pause period: */
    unsigned long            dirty_paused_when;

#ifdef CONFIG_LATENCYTOP
    int                latency_record_count;
    struct latency_record        latency_record[LT_SAVECOUNT];
#endif
    /*
     * Time slack values; these are used to round up poll() and
     * select() etc timeout values. These are in nanoseconds.
     */
    u64                timer_slack_ns;
    u64                default_timer_slack_ns;

#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
    unsigned int            kasan_depth;
#endif

#ifdef CONFIG_KCSAN
    struct kcsan_ctx        kcsan_ctx;
#ifdef CONFIG_TRACE_IRQFLAGS
    struct irqtrace_events        kcsan_save_irqtrace;
#endif
#endif

#if IS_ENABLED(CONFIG_KUNIT)
    struct kunit            *kunit_test;
#endif

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
    /* Index of current stored address in ret_stack: */
    int                curr_ret_stack;
    int                curr_ret_depth;

    /* Stack of return addresses for return function tracing: */
    struct ftrace_ret_stack        *ret_stack;

    /* Timestamp for last schedule: */
    unsigned long long        ftrace_timestamp;

    /*
     * Number of functions that haven't been traced
     * because of depth overrun:
     */
    atomic_t            trace_overrun;

    /* Pause tracing: */
    atomic_t            tracing_graph_pause;
#endif

#ifdef CONFIG_TRACING
    /* State flags for use by tracers: */
    unsigned long            trace;

    /* Bitmask and counter of trace recursion: */
    unsigned long            trace_recursion;
#endif /* CONFIG_TRACING */

#ifdef CONFIG_KCOV
    /* See kernel/kcov.c for more details. */

    /* Coverage collection mode enabled for this task (0 if disabled): */
    unsigned int            kcov_mode;

    /* Size of the kcov_area: */
    unsigned int            kcov_size;

    /* Buffer for coverage collection: */
    void                *kcov_area;

    /* KCOV descriptor wired with this task or NULL: */
    struct kcov            *kcov;

    /* KCOV common handle for remote coverage collection: */
    u64                kcov_handle;

    /* KCOV sequence number: */
    int                kcov_sequence;

    /* Collect coverage from softirq context: */
    unsigned int            kcov_softirq;
#endif

#ifdef CONFIG_MEMCG
    struct mem_cgroup        *memcg_in_oom;
    gfp_t                memcg_oom_gfp_mask;
    int                memcg_oom_order;

    /* Number of pages to reclaim on returning to userland: */
    unsigned int            memcg_nr_pages_over_high;

    /* Used by memcontrol for targeted memcg charge: */
    struct mem_cgroup        *active_memcg;
#endif

#ifdef CONFIG_BLK_CGROUP
    struct request_queue        *throttle_queue;
#endif

#ifdef CONFIG_UPROBES
    struct uprobe_task        *utask;
#endif
#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
    unsigned int            sequential_io;
    unsigned int            sequential_io_avg;
#endif
    struct kmap_ctrl        kmap_ctrl;
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
    unsigned long            task_state_change;
#endif
    int                pagefault_disabled;
#ifdef CONFIG_MMU
    struct task_struct        *oom_reaper_list;
#endif
#ifdef CONFIG_VMAP_STACK
    struct vm_struct        *stack_vm_area;
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
    /* A live task holds one reference: */
    refcount_t            stack_refcount;
#endif
#ifdef CONFIG_LIVEPATCH
    int patch_state;
#endif
#ifdef CONFIG_SECURITY
    /* Used by LSM modules for access restriction: */
    void                *security;
#endif
#ifdef CONFIG_BPF_SYSCALL
    /* Used by BPF task local storage */
    struct bpf_local_storage __rcu    *bpf_storage;
#endif

#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
    unsigned long            lowest_stack;
    unsigned long            prev_lowest_stack;
#endif

#ifdef CONFIG_X86_MCE
    void __user            *mce_vaddr;
    __u64                mce_kflags;
    u64                mce_addr;
    __u64                mce_ripv : 1,
                    mce_whole_page : 1,
                    __mce_reserved : 62;
    struct callback_head        mce_kill_me;
#endif

#ifdef CONFIG_KRETPROBES
    struct llist_head               kretprobe_instances;
#endif

    /*
     * New fields for task_struct should be added above here, so that
     * they are included in the randomized portion of task_struct.
     */
    randomized_struct_fields_end

    /* CPU-specific state of this task: */
    struct thread_struct        thread;

    /*
     * WARNING: on x86, 'thread_struct' contains a variable-sized
     * structure.  It *MUST* be at the end of 'task_struct'.
     *
     * Do not put anything below here!
     */
};

여기서 중요한건 cred 구조체이다.

struct cred {
    atomic_t    usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
    atomic_t    subscribers;    /* number of processes subscribed */
    void        *put_addr;
    unsigned    magic;
#define CRED_MAGIC    0x43736564
#define CRED_MAGIC_DEAD    0x44656144
#endif
    kuid_t        uid;        /* real UID of the task */
    kgid_t        gid;        /* real GID of the task */
    kuid_t        suid;        /* saved UID of the task */
    kgid_t        sgid;        /* saved GID of the task */
    kuid_t        euid;        /* effective UID of the task */
    kgid_t        egid;        /* effective GID of the task */
    kuid_t        fsuid;        /* UID for VFS ops */
    kgid_t        fsgid;        /* GID for VFS ops */
    unsigned    securebits;    /* SUID-less security management */
    kernel_cap_t    cap_inheritable; /* caps our children can inherit */
    kernel_cap_t    cap_permitted;    /* caps we're permitted */
    kernel_cap_t    cap_effective;    /* caps we can actually use */
    kernel_cap_t    cap_bset;    /* capability bounding set */
    kernel_cap_t    cap_ambient;    /* Ambient capability set */
#ifdef CONFIG_KEYS
    unsigned char    jit_keyring;    /* default keyring to attach requested
                     * keys to */
    struct key    *session_keyring; /* keyring inherited over fork */
    struct key    *process_keyring; /* keyring private to this process */
    struct key    *thread_keyring; /* keyring private to this thread */
    struct key    *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
    void        *security;    /* LSM security */
#endif
    struct user_struct *user;    /* real user ID subscription */
    struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
    struct group_info *group_info;    /* supplementary groups for euid/fsgid */
    /* RCU deletion */
    union {
        int non_rcu;            /* Can we skip RCU deletion? */
        struct rcu_head    rcu;        /* RCU deletion hook */
    };
} __randomize_layout;

uid는 프로세스 소유하고 있는 user의 id를 말한다.
uid가 0으로 덮히면, seteuid(0)으로 root 권한을 얻을 수 있다.
euid는 effective user id 이다.
gid, egid도 같은 맥락이다.

prepare_kernel_cred()

커널 서비스에 대한 자격증명을 준비한다.

struct cred *prepare_kernel_cred(struct task_struct *daemon)
{
    const struct cred *old;
    struct cred *new;
 
    new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
    if (!new)
        return NULL;
 
    kdebug("prepare_kernel_cred() alloc %p", new);
 
    if (daemon)
        old = get_task_cred(daemon);
    else
        old = get_cred(&init_cred);
 
    validate_creds(old);
 
    *new = *old;
    atomic_set(&new->usage, 1);
    set_cred_subscribers(new, 0);
    get_uid(new->user);
    get_user_ns(new->user_ns);
    get_group_info(new->group_info);
 
#ifdef CONFIG_KEYS
    new->session_keyring = NULL;
    new->process_keyring = NULL;
    new->thread_keyring = NULL;
    new->request_key_auth = NULL;
    new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
#endif
 
#ifdef CONFIG_SECURITY
    new->security = NULL;
#endif
    if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
        goto error;
 
    put_cred(old);
    validate_creds(new);
    return new;
 
error:
    put_cred(new);
    put_cred(old);
    return NULL;
}
EXPORT_SYMBOL(prepare_kernel_cred);

daemon에 따라 init_cred를 가져올지, daemon의 cred를 가져올건지 선택한다.
그리고 검증해준다.

bool creds_are_invalid(const struct cred *cred)
{
	if (cred->magic != CRED_MAGIC)
		return true;
	return false;
}
EXPORT_SYMBOL(creds_are_invalid);

매크로 따라가보니까 그냥 magic 검사해주는 로직이다.

그리고 security_prepare_creds()로 프로세스의 자격증명을 변경한다.

int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp)
{
    return security_ops->cred_prepare(new, old, gfp);
}

put_cred()로 기존 자격증명을 해제한다.

/**
 * __put_cred - Destroy a set of credentials
 * @cred: The record to release
 *
 * Destroy a set of credentials on which no references remain.
 */
void __put_cred(struct cred *cred)
{
	kdebug("__put_cred(%p{%d,%d})", cred,
	       atomic_read(&cred->usage),
	       read_cred_subscribers(cred));

	BUG_ON(atomic_read(&cred->usage) != 0);
#ifdef CONFIG_DEBUG_CREDENTIALS
	BUG_ON(read_cred_subscribers(cred) != 0);
	cred->magic = CRED_MAGIC_DEAD;
	cred->put_addr = __builtin_return_address(0);
#endif
	BUG_ON(cred == current->cred);
	BUG_ON(cred == current->real_cred);

	if (cred->non_rcu)
		put_cred_rcu(&cred->rcu);
	else
		call_rcu(&cred->rcu, put_cred_rcu);
}
EXPORT_SYMBOL(__put_cred);

daemon을 0으로 주면 init_cred 구조체로 자격증명을 변경할 수 있다.
이때 init_cred 구조체를 확인해보면 다음과 같다.

struct cred init_cred = {
    .usage          = ATOMIC_INIT(4),
#ifdef CONFIG_DEBUG_CREDENTIALS
    .subscribers        = ATOMIC_INIT(2),
    .magic          = CRED_MAGIC,
#endif
    .uid            = GLOBAL_ROOT_UID,
    .gid            = GLOBAL_ROOT_GID,
    .suid           = GLOBAL_ROOT_UID,
    .sgid           = GLOBAL_ROOT_GID,
    .euid           = GLOBAL_ROOT_UID,
    .egid           = GLOBAL_ROOT_GID,
    .fsuid          = GLOBAL_ROOT_UID,
    .fsgid          = GLOBAL_ROOT_GID,
    .securebits     = SECUREBITS_DEFAULT,
    .cap_inheritable    = CAP_EMPTY_SET,
    .cap_permitted      = CAP_FULL_SET,
    .cap_effective      = CAP_FULL_SET,
    .cap_bset       = CAP_FULL_SET,
    .user           = INIT_USER,
    .user_ns        = &init_user_ns,
    .group_info     = &init_groups,
};

즉 daemon을 0으로 주면, root 권한의 자격증명을 준비할 수 있다.

commit_creds()

commit_creds는 새로운 자격증명을 설치한다.

int commit_creds(struct cred *new)
{
    struct task_struct *task = current;
    const struct cred *old = task->real_cred;
 
    kdebug("commit_creds(%p{%d,%d})", new,
           atomic_read(&new->usage),
           read_cred_subscribers(new));
 
    BUG_ON(task->cred != old);
#ifdef CONFIG_DEBUG_CREDENTIALS
    BUG_ON(read_cred_subscribers(old) < 2);
    validate_creds(old);
    validate_creds(new);
#endif
    BUG_ON(atomic_read(&new->usage) < 1);
 
    get_cred(new); /* we will require a ref for the subj creds too */
 
    /* dumpability changes */
    if (!uid_eq(old->euid, new->euid) ||
        !gid_eq(old->egid, new->egid) ||
        !uid_eq(old->fsuid, new->fsuid) ||
        !gid_eq(old->fsgid, new->fsgid) ||
        !cred_cap_issubset(old, new)) {
        if (task->mm)
            set_dumpable(task->mm, suid_dumpable);
        task->pdeath_signal = 0;
        smp_wmb();
    }
 
    /* alter the thread keyring */
    if (!uid_eq(new->fsuid, old->fsuid))
        key_fsuid_changed(task);
    if (!gid_eq(new->fsgid, old->fsgid))
        key_fsgid_changed(task);
 
    /* do it
     * RLIMIT_NPROC limits on user->processes have already been checked
     * in set_user().
     */
    alter_cred_subscribers(new, 2);
    if (new->user != old->user)
        atomic_inc(&new->user->processes);
    rcu_assign_pointer(task->real_cred, new);
    rcu_assign_pointer(task->cred, new);
    if (new->user != old->user)
        atomic_dec(&old->user->processes);
    alter_cred_subscribers(old, -2);
 
    /* send notifications */
    if (!uid_eq(new->uid,   old->uid)  ||
        !uid_eq(new->euid,  old->euid) ||
        !uid_eq(new->suid,  old->suid) ||
        !uid_eq(new->fsuid, old->fsuid))
        proc_id_connector(task, PROC_EVENT_UID);
 
    if (!gid_eq(new->gid,   old->gid)  ||
        !gid_eq(new->egid,  old->egid) ||
        !gid_eq(new->sgid,  old->sgid) ||
        !gid_eq(new->fsgid, old->fsgid))
        proc_id_connector(task, PROC_EVENT_GID);
 
    /* release the old obj and subj refs both */
    put_cred(old);
    put_cred(old);
    return 0;
}
EXPORT_SYMBOL(commit_creds);

current를 task에 넣고, task의 read cred를 old에 넣는다.
그리고 task->credold의 자격증명이 다른지 확인하고, &new→usage에 저장된 값이 1 보다 작은지 확인한다.

prepare_kernel_cred()에서 usage는 이미 1로 세팅되어있어서 상관없다.
그리고 get_cred()를 호출해서 usage를 증가시켜준다.

/**
 * get_cred - Get a reference on a set of credentials
 * @cred: The credentials to reference
 *
 * Get a reference on the specified set of credentials.  The caller must
 * release the reference.  If %NULL is passed, it is returned with no action.
 *
 * This is used to deal with a committed set of credentials.  Although the
 * pointer is const, this will temporarily discard the const and increment the
 * usage count.  The purpose of this is to attempt to catch at compile time the
 * accidental alteration of a set of credentials that should be considered
 * immutable.
 */
static inline const struct cred *get_cred(const struct cred *cred)
{
	struct cred *nonconst_cred = (struct cred *) cred;
	if (!cred)
		return cred;
	validate_creds(cred);
	nonconst_cred->non_rcu = 0;
	return get_new_cred(nonconst_cred);
}
static inline struct cred *get_new_cred(struct cred *cred)
{
	atomic_inc(&cred->usage);
	return cred;
}

그 다음에 uid_eq()gid_eq()로 new와 old를 잘 비교해준다.
여기서 fsuid는 리눅스에는 파일 시스템 접근 제어 용도로 사용되는 파일 시스템 사용자 ID를 뜻한다.

cred_cap_issubset()를 통해서 두 자격증명이 같은 사용자 네임스페이스에 존재하는지 확인한다.

그리고 다시 uid_eq(), gid_eq() 함수를 이용하여 new와 old의 fsuid, fsgid를 비교한다.
두 값이 다를 경우 key_fsuid_changed(), key_fsgid_changed()를 이용하여 현재 프로세스의 fsuid, fsgid 값으로 갱신한다.

/*
 * Handle the fsuid changing.
 */
void key_fsuid_changed(struct cred *new_cred)
{
	/* update the ownership of the thread keyring */
	if (new_cred->thread_keyring) {
		down_write(&new_cred->thread_keyring->sem);
		new_cred->thread_keyring->uid = new_cred->fsuid;
		up_write(&new_cred->thread_keyring->sem);
	}
}

/*
 * Handle the fsgid changing.
 */
void key_fsgid_changed(struct cred *new_cred)
{
	/* update the ownership of the thread keyring */
	if (new_cred->thread_keyring) {
		down_write(&new_cred->thread_keyring->sem);
		new_cred->thread_keyring->gid = new_cred->fsgid;
		up_write(&new_cred->thread_keyring->sem);
	}
}

그리고 alter_cred_subscribers()로 new의 subscribers에 2를 더한다.

static inline void alter_cred_subscribers(const struct cred *_cred, int n)
{
#ifdef CONFIG_DEBUG_CREDENTIALS
	struct cred *cred = (struct cred *) _cred;

	atomic_add(n, &cred->subscribers);
#endif
}

rcu_assign_pointer()task->real_cred, task→cred에 새로운 자격증명을 등록하고 alter_cred_subscribers()로 new의 subscribers에 -2를 더한다.

마지막으로 put_cred()로 이전 자격증명들을 모두 해제한다.

commit_creds(prepare_kernel_cred(0))

최종적으로 다음 코드를 실행시켜서 권한 상승이 가능하다.

commit_creds(prepare_kernel_cred(NULL));
profile
https://msh1307.kr

0개의 댓글