1. ELF
1. Elf32_Ehdr, Elf64_Ehdr

2. ELF 파일의 헤더 부분을 읽어서 ELF 파일이 32bit/64bit 기반인지 파악하고 헤더 일부분 출력해보기
1. 코드
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <elf.h>
void elf32 (Elf32_Ehdr *elf_header) {
printf("ELF file information:\n");
printf(" Class: %d\n", elf_header->e_ident[EI_CLASS]);
printf(" Data: %d\n", elf_header->e_ident[EI_DATA]);
printf(" Version: %d\n", elf_header->e_ident[EI_VERSION]);
printf(" OS/ABI: %d\n", elf_header->e_ident[EI_OSABI]);
printf(" Type: %d\n", elf_header->e_type);
printf(" Machine: %d\n", elf_header->e_machine);
printf(" Entry point address: %d\n", elf_header->e_entry);
printf(" Section header offset: %d\n", elf_header->e_shoff);
printf(" Number of section headers: %d\n", elf_header->e_shnum);
printf(" Size of section headers: %d\n", elf_header->e_shentsize);
printf(" Program header offset: %d\n", elf_header->e_phoff);
printf(" Number of program headers: %d\n", elf_header->e_phnum);
printf(" Size of program headers: %d\n", elf_header->e_phentsize);
}
void elf64 (Elf64_Ehdr *elf_header) {
printf("ELF file information:\n");
printf(" Class: %d\n", elf_header->e_ident[EI_CLASS]);
printf(" Data: %d\n", elf_header->e_ident[EI_DATA]);
printf(" Version: %d\n", elf_header->e_ident[EI_VERSION]);
printf(" OS/ABI: %d\n", elf_header->e_ident[EI_OSABI]);
printf(" Type: %d\n", elf_header->e_type);
printf(" Machine: %d\n", elf_header->e_machine);
printf(" Entry point address: %d\n", elf_header->e_entry);
printf(" Section header offset: %d\n", elf_header->e_shoff);
printf(" Number of section headers: %d\n", elf_header->e_shnum);
printf(" Size of section headers: %d\n", elf_header->e_shentsize);
printf(" Program header offset: %d\n", elf_header->e_phoff);
printf(" Number of program headers: %d\n", elf_header->e_phnum);
printf(" Size of program headers: %d\n", elf_header->e_phentsize);
}
int main(int argc, char *argv[]) {
FILE *fp = fopen(argv[1], "r");
Elf32_Ehdr elf32_header;
Elf64_Ehdr elf64_header;
if(fp == NULL) {
printf("Failed to open file\n"); exit(1);
}
fread(&elf32_header, sizeof(Elf32_Ehdr), 1, fp);
if(memcmp(elf32_header.e_ident, ELFMAG, SELFMAG) != 0) {
printf("Not an ELF file\n");
exit(1);
}
if(elf32_header.e_ident[EI_CLASS] == ELFCLASS32) {
printf("ELF32 file detected\n");
elf32(&elf32_header);
} else if(elf32_header.e_ident[EI_CLASS] == ELFCLASS64) {
fseek(fp, 0, SEEK_SET);
fread(&elf64_header, sizeof(Elf64_Ehdr), 1, fp);
printf("ELF64 file detected\n");
elf64(&elf64_header);
} else {
printf("Unknown ELF file class\n");
}
fclose(fp);
return 0;
}
2. 32bit/64bit ELF 파일 생성 및 테스트

왜 안될까..

내가 사용중인 ARM 칩용 리눅스의 문제

해결

2. SIMD 프로그램
실습 1
1. 코드
#include <stdio.h>
#include<stdlib.h>
#include <time.h>
#include <arm_neon.h>
void mat_mul_c (float* dst, float* src1, const float* src2, int count) {
int i;
for (i=0; i<count; i++) {
dst[i] = src1[i]*src2[i];
}
}
void mat_mul_neon_c (float* dst, float* src1, const float* src2, int count) {
for (; count; count -=4, src1+=4, src2+=4, dst += 4) {
float32x4_t in1, in2, out;
in1 = vld1q_f32(src1);
in2 = vld1q_f32(src2);
out = vmulq_f32(in1, in2);
vst1q_f32(dst, out);
}
}
void mat_mul_neon_asm (float* dst, float* src1, const float* src2, int count) {
asm volatile("1: \n"
" ld1 {v0.4s}, [%[src1]], #16 \n"
" ld1 {v1.4s}, [%[src2]], #16 \n"
" fmul v0.4s, v0.4s, v1.4s \n"
" subs %[count], %[count], #4 \n"
" st1 {v0.4s}, [%[dst]], #16 \n"
" bgt 1b \n"
: [dst] "+r"(dst)
: [src1] "r"(src1), [src2] "r"(src2), [count]
"r"(count)
: "memory", "v0", "v1"
);
}
int main(int argc, char *argv[]) {
if (argc != 2) {
perror("arc err");
exit(-1);
}
int array_size = atoi(argv[1]);
struct timespec begin, end;
double mat_mul_c_time, mat_mul_neon_c_time, mat_mul_neon_asm_time;
float a[array_size];
float b[array_size];
float c[array_size];
srand(0);
for (int i=0; i<array_size; i++) {
a[i] = rand();
b[i] = rand();
}
clock_gettime(CLOCK_MONOTONIC, &begin);
mat_mul_c(c, a, b, array_size);
clock_gettime(CLOCK_MONOTONIC, &end);
mat_mul_c_time = (end.tv_sec - begin.tv_sec) + (end.tv_nsec - begin.tv_nsec) / 1e9;
clock_gettime(CLOCK_MONOTONIC, &begin);
mat_mul_neon_c(c, a, b, array_size);
clock_gettime(CLOCK_MONOTONIC, &end);
mat_mul_neon_c_time = (end.tv_sec - begin.tv_sec) + (end.tv_nsec - begin.tv_nsec) / 1e9;
clock_gettime(CLOCK_MONOTONIC, &begin);
mat_mul_neon_asm(c, a, b, array_size);
clock_gettime(CLOCK_MONOTONIC, &end);
mat_mul_neon_asm_time = (end.tv_sec - begin.tv_sec) + (end.tv_nsec - begin.tv_nsec) / 1e9;
printf("array_size = %d\n", array_size);
printf("mat_mul_c_time : %lf\n", mat_mul_c_time);
printf("mat_mul_neon_c_time : %lf\n", mat_mul_neon_c_time);
printf("mat_mul_neon_asm_time : %lf\n", mat_mul_neon_asm_time);
return 0;
}
2. 결과




실습 2
1. 코드
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <arm_neon.h>
void mat_mul_c (float* dst, float* src1, const float* src2, int count) {
int i;
for (i=0; i<count; i++) {
dst[i] = src1[i]*src2[i];
}
}
void mat_mul_neon_c (float* dst, float* src1, const float* src2, int count) {
float32x4_t in1, in2, out;
for (; count>=4; count-=4, src1+=4, src2+=4, dst += 4) {
in1 = vld1q_f32(src1);
in2 = vld1q_f32(src2);
out = vmulq_f32(in1, in2);
vst1q_f32(dst, out);
}
if (count > 0) {
float32_t tmp_src1[4], tmp_src2[4], tmp_dst[4];
memcpy(tmp_src1, src1, count * sizeof(float));
memcpy(tmp_src2, src2, count * sizeof(float));
in1 = vld1q_f32(tmp_src1);
in2 = vld1q_f32(tmp_src2);
out = vmulq_f32(in1, in2);
vst1q_f32(tmp_dst, out);
memcpy(dst, tmp_dst, count * sizeof(float));
}
}
int main(int argc, char *argv[]) {
if (argc != 2) {
perror("arc err");
exit(-1);
}
int array_size = atoi(argv[1]);
struct timespec begin, end;
double mat_mul_c_time, mat_mul_neon_c_time;
float* a = malloc(array_size * sizeof(float));
float* b = malloc(array_size * sizeof(float));
float* c = malloc(array_size * sizeof(float));
srand(0);
for (int i=0; i<array_size; i++) {
a[i] = rand();
b[i] = rand();
}
clock_gettime(CLOCK_MONOTONIC, &begin);
mat_mul_c(c, a, b, array_size);
clock_gettime(CLOCK_MONOTONIC, &end);
mat_mul_c_time = (end.tv_sec - begin.tv_sec) + (end.tv_nsec - begin.tv_nsec) / 1e9;
clock_gettime(CLOCK_MONOTONIC, &begin);
mat_mul_neon_c(c, a, b, array_size);
clock_gettime(CLOCK_MONOTONIC, &end);
mat_mul_neon_c_time = (end.tv_sec - begin.tv_sec) + (end.tv_nsec - begin.tv_nsec) / 1e9;
printf("array_size = %d\n", array_size);
printf("mat_mul_c_time : %lf\n", mat_mul_c_time);
printf("mat_mul_neon_c_time : %lf\n", mat_mul_neon_c_time);
free(a);
free(b);
free(c);
return 0;
}
2. 결과
