Location>code7788 >text

《Programming from the Ground Up》阅读笔记:p88-p94

Popularity:470 ℃/2024-07-27 14:41:10

Programming from the Ground Up study day 5, p88-p94 summary totaling 7 pages.

I. Technical summary

#PURPOSE: This program converts an input file 
#to an output file with all letters 
#converted to uppercase.

#PROCESSING:
#(1)Open the input file
#(2)Open the output file
#(3)While we're not at the end of the input file
#  (a)read part of file into our memory buffer
#  (b)go throught each byte of memory, if the byte is a lower-case letter, convert it to uppercase
#  (c)write the memory buffer to output file

 .section .data

#######CONSTANTS#######

 #system call numbers
 .equ SYS_OPEN, 5
 .equ SYS_WRITE, 4
 .equ SYS_READ, 3
 .equ SYS_CLOSE, 6
 .equ SYS_EXIT, 1
 
 #options for open (look at
 #/usr/incude/asm/ for
 #various values. You can combine them
 #by adding them or ORing them)
 #This is discussed at greater length
 #in "Counting Like a Computer"
 .equ O_RDONLY, 0
 .equ O_CREAT_WRONLY_TRUNC, 03101
 
 #standard file descriptors
 .equ STDIN, 0
 .equ STDOUT, 1
 .equ STDERR, 2
 
 #system call interrupt
 .equ LINUX_SYSCALL, 0X80
 
 .equ END_OF_FILE, 0 #This is the return value 
                     #of read which means we've
 					#hit the end of the file
 
 .equ NUMBER_ARGUMENTS, 2

.section .bss
 #Buffer - This is where the data is loaded into 
 #from the data file and written from 
 #into the output file. This should 
 #never exceed 16000 for various
 #reasons.
 
 .equ BUFFER_SIZE, 500
 .lcomm BUFFER_DATA, BUFFER_SIZE
 
 .section .text
 
 #STACK POSITIONS
 .equ ST_SIZE_RESERVE, 8
 .equ ST_FD_IN, -4
 .equ ST_FD_OUT, -8
 .equ ST_ARGC, 0 #Number of arguments
 .equ ST_ARGV_0, 4 #Name of program
 .equ ST_ARGV_1, 8 #Input file name
 .equ ST_ARGV_2, 12 #Output file name

 .globl _start
_start:
 ###INITIALIZE PROGRAM###
 #save the stack pointer

 mov %rsp, %rbp
 
 #Allocate space for our file descriptors
 #on the stack
 sub $ST_SIZE_RESERVE, %rsp

open_files:
open_fd_in:
 ###OPEN INPUT FILE###
 #open syscall
 mov $SYS_OPEN, %rax
 #input filename into %rbx
 mov ST_ARGV_1(%rbp), %rbx
 #read-only flag
 mov $O_RDONLY, %rcx
 #this doesn’t really matter for reading
 mov $0666, %rdx
 #call Linux
 int $LINUX_SYSCALL

store_fd_in:
 #save the given file descriptor
 mov %rax, ST_FD_IN(%rbp)
 
open_fd_out:
 ###OPEN OUTPUT FILE###
 #open the file
 mov $SYS_OPEN, %rax
 #output filename into %rbx
 mov ST_ARGV_2(%rbp), %rbx
 #flags for writing to the file
 mov $O_CREAT_WRONLY_TRUNC, %rcx
 #mode for new file (if it’s created)
 mov $0666, %rdx
 #call Linux
 int $LINUX_SYSCALL
 
store_fd_out:
 #store the file descriptor here
 mov %rax, ST_FD_OUT(%rbp)
 
 ###BEGIN MAIN LOOP###
read_loop_begin:

 ###READ IN A BLOCK FROM THE INPUT FILE###
 mov $SYS_READ, %rax
 #get the input file descriptor
 mov ST_FD_IN(%rbp), %rbx
 #the location to read into
 mov $BUFFER_DATA, %rcx
 #the size of the buffer
 mov $BUFFER_SIZE, %rdx
 #Size of buffer read is returned in %rax
 int $LINUX_SYSCALL
 
 ###EXIT IF WE’VE REACHED THE END###
 #check for end of file marker
 cmp $END_OF_FILE, %rax
 #if found or on error, go to the end
 jle end_loop

continue_read_loop:
 ###CONVERT THE BLOCK TO UPPER CASE###
 push $BUFFER_DATA #location of buffer
 push %rax #size of the buffer
 call convert_to_upper
 pop %rax #get the size back
 add $4, %rsp #restore %rsp
 
 ###WRITE THE BLOCK OUT TO THE OUTPUT FILE###
 #size of the buffer
 mov %rax, %rdx
 mov $SYS_WRITE, %rax
 #file to use
 mov ST_FD_OUT(%rbp), %rbx
 #location of the buffer
 mov $BUFFER_DATA, %rcx
 int $LINUX_SYSCALL
 
 ###CONTINUE THE LOOP###
 jmp read_loop_begin

end_loop:
 ###CLOSE THE FILES###
 ##NOTE - we don’t need to do error checking on these, because error conditions
 #don’t signify anything special here
 mov $SYS_CLOSE, %rax
 mov ST_FD_OUT(%rbp), %rbx
 int $LINUX_SYSCALL
 mov $SYS_CLOSE, %rax
 mov ST_FD_IN(%rbp), %rbx
 int $LINUX_SYSCALL
 ###EXIT###
 mov $SYS_EXIT, %rax
 mov $0, %rbx
 int $LINUX_SYSCALL
 #PURPOSE: This function actually does the
 # conversion to upper case for a block
 #
 #INPUT:The first parameter is the location

 #of the block of memory to convert
 #The second parameter is the length of
 #that buffer
 #
 #OUTPUT: This function overwrites the current
 #buffer with the upper-casified version.
 #
 ##VARIABLES: %rax - beginning of buffer
 #%rbx - length of buffer
 #%rdi - current buffer offset
 #%cl - current byte being examined
 #(first part of %rcx)
 #


 ###CONSTANTS##
 #The lower boundary of our search
 .equ LOWERCASE_A, 'a'
 #The upper boundary of our search
 .equ LOWERCASE_Z, 'z'
 #Conversion between upper and lower case
 .equ UPPER_CONVERSION, 'A' - 'a'
 
 
 ###STACK STUFF###
 .equ ST_BUFFER_LEN, 8 #Length of buffer
 .equ ST_BUFFER, 12 #actual buffer
 
convert_to_upper:
 push %rbp
 mov %rsp, %rbp
 ###SET UP VARIABLES###
 mov ST_BUFFER(%rbp), %rax
 mov ST_BUFFER_LEN(%rbp), %rbx
 mov $0, %rdi

 #if a buffer with zero length was given
 #to us, just leave
 cmp $0, %rbx
 je end_convert_loop

convert_loop:
 #get the current byte
 movb (%rax,%rdi,1), %cl

 #go to the next byte unless it is between
 #’a’ and ’z’
 cmpb $LOWERCASE_A, %cl
 jl next_byte
 cmpb $LOWERCASE_Z, %cl
 jg next_byte

 #otherwise convert the byte to uppercase
 addb $UPPER_CONVERSION, %cl
 #and store it back
 movb %cl, (%rax,%rdi,1)

next_byte:
 inc %rdi #next byte
 cmp %rdi, %rbx #continue unless
                #we’ve reached the
                #end
				
 jne convert_loop

end_convert_loop:
 #no return value, just leave
 mov %rbp, %rsp
 pop %rbp
 ret

Knocked on the code according to the book, but no result, and no error report, heart tired, first.

2. Role of different registers on a 64-bit computer

Registers on computers with different numbers of bits:

r8 = AL AH BL BH CL CH DL DH

r16 = AX BX CX DX BP SP SI DI

r32 = EAX EBX ECX EDX EBP ESP ESI EDI

r64 = RAX RBX RCX RDX RBP RSP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15

Personally, I think that the author should actually list the role of each register first, so that the reader can know why the register is used in the code, or else he/she won't know which register to use when writing it himself/herself. The roles of the registers can be searched for in the System V Application Binary Interface AMD64 Architecture Processor Supplement manual (eg:/elf/x86_64-abi-0.), which is described in "Figure 3.4: Register Usage" in the manual. It is listed here for easy reference:

Register Usage callee saved
%rax temporary register; with variable arguments passes information
about the number of vector registers used; 1st return register
No
%rbx callee-saved register Yes
%rcx used to pass 4th integer argument to functions No
%rdx used to pass 3rd argument to functions; 2nd return register No
%rsp stack pointer Yes
%rbp callee-saved register; optionally used as frame pointer Yes
%rsi used to pass 2nd argument to functions No
%rdi used to pass 1st argument to functions No
%r8 used to pass 5th argument to functions No
%r9 used to pass 6th argument to functions No
%r10 temporary register, used for passing a function’s static chain
pointer
No
%r11 temporary register No
%r12-r14 callee-saved registers Yes
%r15 callee-saved register; optionally used as GOT base pointer Yes
%xmm0–%xmm1 used to pass and return floating point arguments No
%xmm2–%xmm7 used to pass floating point arguments No
%xmm8–%xmm15 temporary registers No
%tmm0–%tmm7 temporary registers No
%mm0–%mm7 temporary registers No
%k0–%k7 temporary registers No
%st0,%st1 temporary registers, used to return long double arguments No
%st2–%st7 temporary registers No
%fs thread pointer Yes
mxcsr SSE2 control and status word partial
x87 SW x87 status word No
x87 CW x87 control word Yes
tilecfig Tile control register No

II. English summary

None.

III. Other

Seek first completion, then perfection, and see a little each day.

IV. References

1. Programming

(1)Jonathan Bartlett,《Programming From The Ground Up》:/subject/1787855/

2. English

(1)Etymology Dictionary:

(2) Cambridge Dictionary:

Welcome to search and follow: programmers (a_codists)