1、再读linux内核存储管理系统完整打印版再读内核存储管理(1):相关的全局变量1.1 相关的全局变量1.1.1 _ramstart, _ramend, _rambase这三个全局变量的定义在head.s文件中:/* Set up the usable of RAM stuff. Size of RAM is determined then* an initial stack set up at the end.*/.align 4_rambase:.long 0_ramstart:.long 0_ramend:.long 0为了在C文件中使用它们,在include/asm/bfin-globa
2、l.h中做了一个声明:extern unsigned long _ramstart, _ramend, _rambase;然后在setup_arch函数中对它们进行了初始化: _ramend = CONFIG_MEM_SIZE * 1024 * 1024; _ramstart = (unsigned long)_bss_stop; _rambase = (unsigned long)_stext;在这里_bss_stop是在ldf文件中定义: bsz_sdram ZERO_INIT INPUT_SECTION_ALIGN(4) . = (. + 3) / 4 * 4; _bss_stop =
3、.; _end = .; MEM_SDRAM而bsz_sdram是内核中的最后一个段,因此_ramstart将指向内核之后的第一个可用字节。_stext也是在ldf文件中定义的一个变量: .text INPUT_SECTION_ALIGN(4) . = (. + 3) / 4 * 4; _text = .; _text = .; _stext = .; . INPUT_SECTION_ALIGN(4) . = (. + 3) / 4 * 4; _etext = .; MEM_SDRAM_TEXT而.text是内核中排在最前面的一个段,因此_rambase将指向内核的起始位置。在这里还需要注意_
4、ramend的定义,虽然在这里给它赋了一个固定的值,但是通过引导程序设定启动参数可以进行更改,如引导程序使用mem=32m的时候,将会在parse_cmdline_early函数中触发下面的代码: if (!memcmp(to, mem=, 4) to += 4; memsize = memparse(to, &to); if (memsize) _ramend = memsize; else if (!memcmp(to, max_mem=, 8) .从而将这个值更改。1.1.2 memory_start和memory_end这两个全局变量几乎都是同时使用的,它们的定义在arch/black
5、fin/kernel/setup.c中:unsigned long memory_start, memory_end, physical_mem_end;在setup_arch函数中对它们做了初始化工作: memory_start = PAGE_ALIGN(_ramstart);/* to align the pointer to the (next) page boundary */#define PAGE_ALIGN(addr) (addr)+PAGE_SIZE-1)&PAGE_MASK)即memory_start指向内核结束的位置,但是向上做了页对齐(4K)。 /* by now the
6、 stack is part of the init task */ memory_end = _ramend - DMA_UNCACHED_REGION;#if defined(CONFIG_DMA_UNCACHED_2M)# define DMA_UNCACHED_REGION (2 * 1024 * 1024)#elif defined(CONFIG_DMA_UNCACHED_1M)# define DMA_UNCACHED_REGION (1024 * 1024)#else# define DMA_UNCACHED_REGION (0)#endif即memory_end指向可用物理内存
7、的最高位置。当没有启用MTD的时候,还有下面一段代码:#if (defined(CONFIG_BFIN_ICACHE) & ANOMALY_05000263) /* Due to a Hardware Anomaly we need to limit the size of usable * instruction memory to max 60MB, 56 if HUNT_FOR_ZERO is on * 05000263 - Hardware loop corrupted when taking an ICPLB exception */#if (defined(CONFIG_DEBUG
8、_HUNT_FOR_ZERO) if (memory_end = 56 * 1024 * 1024) memory_end = 56 * 1024 * 1024;#else if (memory_end = 60 * 1024 * 1024) memory_end = 60 * 1024 * 1024;#endif /* CONFIG_DEBUG_HUNT_FOR_ZERO */ printk(KERN_NOTICE Warning: limiting memory to %liMB due to hardware anomaly 05000263n, memory_end 20);#endi
9、f /* ANOMALY_05000263 */此时,由于ANOMALY_05000263的缘故,memory_end将被限制为60M,即0x03c0 0000。#if !defined(CONFIG_MTD_UCLINUX)memory_end -= SIZE_4K; /*In case there is no valid CPLB behind memory_end make sure we dont get to close*/#endif由此,memory_end的值变为0x03bf f000。且不再改变。当启用了MTD的时候,memory_end将指向物理内存减去RAMDISK大小的
10、位置。1.1.3 nr_kernel_pages与nr_all_pages这两个值的定义都在mm/page_alloc.c中:unsigned long _meminitdata nr_kernel_pages;unsigned long _meminitdata nr_all_pages;在free_area_init_core这个初始化函数中对它们赋初值:static void _meminit free_area_init_core(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_si
11、ze) for (j = 0; j node_zones + j; unsigned long size, realsize, memmap_pages; / size = realsize = SDRAM的页表数量,对M SDRAM,其值为x3fff size = zone_spanned_pages_in_node(nid, j, zones_size); realsize = size - zone_absent_pages_in_node(nid, j, zholes_size); /* * Adjust realsize so that it accounts for how muc
12、h memory * is used by this zone for memmap. This affects the watermark * and per-cpu initialisations */ memmap_pages = (size * sizeof(struct page) PAGE_SHIFT; if (realsize = memmap_pages) realsize -= memmap_pages; printk(KERN_DEBUG %s zone: %lu pages used for memmapn, zone_namesj, memmap_pages); els
13、e printk(KERN_WARNING %s zone: %lu pages exceeds realsize %lun, zone_namesj, memmap_pages, realsize); /* Account for reserved pages */ / dma_reserve的值可以从引导程序导入,在此为0 if (j = 0 & realsize dma_reserve) realsize -= dma_reserve; printk(KERN_DEBUG %s zone: %lu pages reservedn, zone_names0, dma_reserve); /
14、 is_highmem_idx恒为 if (!is_highmem_idx(j) nr_kernel_pages += realsize; nr_all_pages += realsize; . 从上述代码可以看出,这两个值都表示可用的页的数量,其表示的内存范围从0到60M,不包含page数组所占用的页。对于64MSDRAM(实际限制为60M),不启用MTD的情况,其值为0x3b6a。1.1.4 mem_map这个全局变量的定义在mm/nommu.c中:struct page *mem_map;在内核中每个4K的页都有一个struct page与之相对应,而mem_map就是指向这个page数
15、组的头指针。它的初始化由alloc_node_mem_map完成:static void _init_refok alloc_node_mem_map(struct pglist_data *pgdat) /* ia64 gets its own node_mem_map, before this, without bootmem */ if (!pgdat-node_mem_map) unsigned long size, start, end; struct page *map; /* * The zones endpoints arent required to be MAX_ORDER
16、 * aligned but the node_mem_map endpoints must be in order * for the buddy allocator to function correctly. */ start = pgdat-node_start_pfn & (MAX_ORDER_NR_PAGES - 1); end = pgdat-node_start_pfn + pgdat-node_spanned_pages; end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) * sizeof(struct pa
17、ge); map = alloc_remap(pgdat-node_id, size); if (!map) map = alloc_bootmem_node(pgdat, size); pgdat-node_mem_map = map + (pgdat-node_start_pfn - start); /* * With no DISCONTIG, the global mem_map is just set as node 0s */ if (pgdat = NODE_DATA(0) mem_map = NODE_DATA(0)-node_mem_map; 从这个函数可以看出,它的值与pg
18、list_data中的node_mem_map成员相同。在这里pgdat指向全局唯一的pglist_data:extern struct pglist_data contig_page_data;pgdat-node_start_pfn的值为0。pgdat-node_spanned_pages的值为整个SDRAM中的页(4K)数量。1.1.5 contig_page_data内核支持所谓的NUMA结构,它将整个系统的存储空间分成几个不连续的节点,每个节点用一个pglist_data进行描述,再将这些节点放在一个链表中,但在BF561系统内核中定义了一个叫NODE_DATA的宏,它的定义在 in
19、clude/linux/mmzone.h中extern struct pglist_data contig_page_data;#define NODE_DATA(nid) (&contig_page_data)从这个定义可以看出,在内核中实际只有一个pglist_data。即contig_page_data。1.1.6 vm_total_pages这个值的定义在mm/vmscan.c中:long vm_total_pages; /* The total number of pages which the VM controls */它表示内存的可用页数,其初始化由void _meminit
20、build_all_zonelists(void) vm_total_pages = nr_free_pagecache_pages(); printk(Built %i zonelists. Total pages: %ldn, num_online_nodes(), vm_total_pages);函数完成,对于64M内存(实际限制为60M),其值将为0x3b6a。再读内核存储管理(2):相关的数据结构1.1 相关的数据结构1.1.1 pglist_datapglist_data的定义在include/linux/mmzone.h中:/* The pg_data_t structure i
21、s used in machines with CONFIG_DISCONTIGMEM* (mostly NUMA machines?) to denote a higher-level memory zone than the* zone denotes.* On NUMA machines, each NUMA node would have a pg_data_t to describe* its memory layout.* Memory statistics and page replacement data structures are maintained on a* per-
22、zone basis.*/struct bootmem_data;typedef struct pglist_data struct zone node_zonesMAX_NR_ZONES; struct zonelist node_zonelistsMAX_NR_ZONES; int nr_zones; struct page *node_mem_map; struct bootmem_data *bdata; unsigned long node_start_pfn; unsigned long node_present_pages; /* total number of physical
23、 pages */ unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; wait_queue_head_t kswapd_wait; struct task_struct *kswapd; int kswapd_max_order; pg_data_t;这个结构体用于描述可用存储空间的情况。l bdatastatic bootmem_data_t contig_bootmem_data;struct pglist_data contig_p
24、age_data = .bdata = &contig_bootmem_data ;从这个定义还可以看出在这个结构体中,bdata实际将指向一个固定的位置contig_bootmem_data且在mem_init函数调用后此成员将不再使用。l zone对于这个结构体中的zone,内核实际只使用了ZONE_DMA(0)这个区域,它的范围从内核代码结束一直到物理内存结束。l node_id因为整个内核只使用了一个NODE,因此在这个结构体中node_id的值将为0。l node_start_pfn将为0。l node_spanned_pages和node_present_pages两个成员的初始化
25、在calculate_node_totalpages函数中完成,它们的值为SDRAM的页表数量,包含未用的区域和内核代码等,其值相等。对于64M内存而言(实际限制到60M),其值为0x3bff。l node_mem_map在内核中每个4K的页都有一个struct page结构体与之对应,这个成员指向这个page数组的首地址,它将在初始化时由alloc_node_mem_map函数进行空间分配(使用bootmem)。l nr_zones这个值用于表示可用的zone的最高序号+1。对于BF561而言,只使用了ZONE_DMA,因此这个值将为1。1.1.2 per_cpu_pageset这个结构体的
26、定义在include/linux/mmzone.h中:enum zone_stat_item /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, NR_INACTIVE, NR_ACTIVE, NR_ANON_PAGES, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, NR_FILE_DIRTY, NR_WRITEBACK, /* Second 128 byte cacheline */
copyright@ 2008-2022 冰豆网网站版权所有
经营许可证编号:鄂ICP备2022015515号-1