Chapter 9

Strings and Arrays

Modified

Overview

Arrays are groups of variables accessible individually by an index. C++ arrays are implemented nearly identical as on a machine architecture so much of the notions of array definition and access to individual variables are familiar. Additionally, the Intel processor implements a number of instructions specifically designed for array operations to make common array operations easier and more efficient. The assembly language for Intel processors and C++ allows arrays to be accessed either by an index or by a pointer.

Defining Arrays

C++ defines arrays starting at index 0 which corresponds to the starting offset of an array. In the following, array s begins at offset 1230. For the character array, because each variable is one byte, the offset of s[2] is 1232 or the offset s + 2 (i.e. 1230+2 = 1232). For the integer array, because each variable is two bytes, the offset of the C++ array x[2] is 1244 or the offset x + 4 (i.e. 1240+4 = 1244).

The array s, defined as:

char s[5] = {'A', 'B', 'C', 'D', 'E'};

s    db    'A', 'B', 'C', 'D', 'E'

is represented in memory by: 

 Index  0 1 2 4
   s  A B C D E
Displacement  0 1 2 3 4
Offset  123016  123116  123216  123316  123416

The array x, defined as:

int x[4] = {45, 67, -23, 58}

x    dd    45, 67, -23, 58

is represented in memory by: 

 Index  0 1 2
   x  45 67 -23 58 
Displacement  0 4 8 12
Offset  124016 124416 124816 124C16

 

Indexed Array Access

In C++ array variables are accessed by an index, which may be a constant or a suitable variable. In C++ the index must be an ordinal value, integer, character, etc. In assembler, an index register, such as eSi, eDi, or eBx is used. Array variables may also be accessed by a pointer, more on that later. The correspondence between C++ and assembly is very close, for example the following array accesses are equivalent. One point of note, whenever a double word array variable is accessed, because each variable occupies four bytes, the variable offset is computed by multiplying the C++ index by 4. The definitions of s and x from above are used in the following example.

 

 Index  0 1 2 4
   s  A B C D E
Displacement  0 1 2 3 4
Offset  123016  123116  123216  123316  123416

 Index  0 1 2
   x  45 67 -23 58 
Displacement  0 4 8 12
Offset  124016 124416 124816 124C16
C++ and Assembly Indexed Array Access
C++  Assembler
char s[5] = {'A','B','C','D','E'}; 
int    x[5] = {45, 67, -23, 58};
char ch;
int i;
int z;
s   db   'A','B','C','D','E'
x   dd   45, 67, -23, 58
ch  db   ?
i   dd   ?
z   dd   ?
s[3] = 'D'; Mov  s[3], 'D'
char ch;
ch=s[2];
Mov  Al, s[2]
Mov  ch, Al
x[2]=763; Mov  x[8], 763
int z;
z=x[1];
Mov  e Ax, x[4]
Mov  z, eAx
int i = 3;
s[i] = 'D';
Mov  eSi, 3
Mov  s[eSi], 'D'
int i = 2;
x[i] = 763;
Mov  eSi, 8
Mov  x[eSi], 763

Indexing Examples

C++ and Assembler Upper to Lower Case Conversion
C++ Assembler
// Convert upper to lower case by setting
// bit 5 to a 1
void main(void) {
    int i;
    char s[5] = {'A', 'B', 'C', 'D', 'E'};

    for(i=0; i<=4; i++)
        s[i] = s[i] | 0x20;
}   
 Index  0 1 2 4
   s  A B C D E
Displacement  0 1 2 3 4
Offset  123016  123116  123216  123316  123416
.data
    s    db     'A', 'B', 'C', 'D', 'E'

.code
main    proc  near 
        Mov   eSi, 0
 for:   Cmp   eSi, 4
        Jbe   do
        Jmp   endfor
  do:   Or    s[eSi], 20h
        Inc   eSi
        Jmp   for
 endfor:
	invoke ExitProcess, 0
main    Endp
        End   main

 

C++ and Assembler Summing an Integer Array
C++ Assembler
// Sum array x

void main(void) {
    int i, Sum;
    int  x[4] = { 45, 67, -23, 58};

    Sum = 0;
    for(i=0; i<=3; i++)
        Sum = Sum + x[i];
}   

 

 Index  0 1 2
   x  45 67 -23 58 
Displacement  0 4 8 12
Offset  124016 124416 124816 124C16
.data
  Sum   dd     ?
  x     dd     45, 67, -23, 58

.code
main    proc  near 
        Mov   eAx, 0
        Mov   eSi, 0
 for:   Cmp   eSi, 3*4
        Jbe   do
        Jmp   endfor
  do:   Add   Ax, x[eSi]
        Add   eSi, 4
        Jmp   for
 endfor:
        Mov   Sum, eAx
	  invoke ExitProcess, 0
main    Endp
        End   main

Indexing Array Parameters

Arrays in C++ are passed to a function using call by reference, only the starting offset of the array is passed. The array can be indexed in Assembler using two registers, one to hold the starting offset, the other to hold the index. In the following example, eBx holds the starting offset and eSi holds the index. Assuming eBx=1240 and eSi=4, the instruction:

Mov    Ax, [eBx][eSi]
would access offset 1244, the effective address of the instruction determined by eBx+eSi = 1240+4. By incrementing or decrementing the index value in eSi (or offset in eBx) each array entry can be accessed.

 

C++ and Assembler Function Summing an Integer Array
C++ Assembler
// Sum array x using function Sumf

int Sumf(int a[]) { // Pass by reference
    int i, result;

    result = 0;
    for(i=0; i<=3; i++)
        result = result + a[i];

    return result;
}

void main(void) {
    int Sum;
    int  x[4] = { 45, 67, -23, 58};

    Sum = Sumf( x );   
}   

   _______________  
  | Offset X 1240 |  eBp+8
  |_______________|
  |  Ret Address  |  eBp+4
  |_______________|
  |  Old eBp      |  eBp+0
  |_______________|
 Index  0 1 2
   x  45 67 -23 58 
Displacement  0 4 8 12
Offset  124016 124416 124816 124C16

 

.data
  Sum   dd     ?
  x     dd     45, 67, -23, 58
Data    Ends

.code
main    proc  near  
        Push  offset x
        Call  Sumf
        Add   eSp, 4
        Mov   Sum, eAx

	invoke ExitProcess, 0
main    Endp

Sumf    proc  Near
        Push  eBp
        Mov   eBp, eSp

        Mov   eAx, 0
        Mov   eSi, 0
        Mov   eBx, [eBp+8]   ; eBx = Offset 1240
 for:   Cmp   eSi, 3*4
        Jbe   do
        Jmp   endfor
  do:   Add   eAx, [eBx][eSi] ; Offset 1240 + eSi
        Add   eSi, 4          ; Next index
        Jmp   for
 endfor:
        Pop   eBp
        Ret
Sumf    Endp
        End   main

Pointer Array Access

C++ and Assembler both provide pointer access to arrays which is often more convenient for parameter passing, since arrays are passed by reference.
 Index  0 1 2 4
   s  A B C D E
Displacement  0 1 2 3 4
Offset  123016  123116  123216  123316  123416

 

 Index  0 1 2
   x  45 67 -23 58 
Displacement  0 4 8 12
Offset  124016 124416 124816 124C16
C++ and Assembly Indexed Array Access
C++ Assembler
char s[5] = {'A','B','C','D','E'}; 
int    x[4] = {45, 67, -23, 58};
s   db    'A','B','C','D','E'
x   dd   45, 67, -23, 58
char *ps;
ps = &s[3];
*ps = Z';
Mov  eSi, offset s[3]
Mov  [eSi], 'Z'
char *ps;
ps = s;
*(ps+3) = 'Z';
Mov  eSi, offset s
Mov  [eSi+3], 'Z'
int *px;
px = &x[2];
*px=763;
Mov  eSi, offset x[8]
Mov  [eSi], 763
int *px;
px = x;
*(px+2) = 763;  // Same as above
Mov  eSi, offset x
Mov  [eSi+8], 763

C++ and Assembler Upper to Lower Case Conversion
C++ Assembler
void main(void) {
    char s[5] = {'A', 'B', 'C', 'D', 'E'};
    char *ps;

    for(ps=s; ps<=s+4; ps++)
        *ps = *ps | 0x20;
}   
 Index  0 1 2 4
   s  A B C D E
Displacement  0 1 2 3 4
Offset  123016  123116  123216  123316  123416
.data
    s    db     'A', 'B', 'C', 'D', 'E'

.code    
main    proc  near
        Mov   eSi, offset s   ;offset 1230
 for:   Cmp   eSi, offset s+4 ;offset 1234
        Jbe   do
        Jmp   endfor
  do:   Or    [eSi], 20h
        Inc   eSi
        Jmp   for
 endfor:
	invoke ExitProcess, 0
main    Endp
Code    Ends
        End   main

C++ and Assembler Summing an Array
C++ Assembler
void main(void) {
    int Sum;
    int  x[5] = { 45, 67, -23, 58};
    int *px;

    Sum = 0;
    for(px=x; px<=x+4; px++)
        Sum = Sum + *px;
}   

 

 Index  0 1 2
    x  45 67 -23 58 
Displacement  0 4 8 12
Offset  124016 124416 124816 124C16
.data
  Sum   dd     ?
  x     dd     45, 67, -23, 58

.code
main    proc  near 
        Mov   eAx, 0
        Mov   eSi, offset x        ; 1240
 for:   Cmp   eSi, offset x + 3*4  ; 124C
        Jbe   do
        Jmp   endfor
  do:   Add   eAx, [eSi]
        Add   eSi, 4
        Jmp   for
 endfor:Mov   Sum, eAx
	invoke ExitProcess, 0
main    Endp
        End   main

Indexing Array Parameters using Pointers

Arrays in C++ are passed to a function using call by reference, only the starting offset of the array is passed. The array can be indexed in Assembler using one register to hold the offset. In the following example, eSi holds the offset. Assuming eSi=1244, the instruction:

Mov    eAx, [eSi]                ; eAx = 67
would access offset 1244, the effective address of the instruction. By incrementing or decrementing the offset value in eSi each array entry can be accessed.
 
C++ and Assembler Function Summing an Array
C++ Assembler
int Sumf(int a[]) {
    int *pa, result;

    result = 0;
    for(pa=a; pa<=a+4; pa++)
        result = result + *pa;
    return result;
}

void main(void) {
    int Sum;
    int  x[5]={45, 67, -23, 58};

    Sum = Sumf( x );
} 

   _______________  
  | Offset X 1240 |  eBp+8
  |_______________|
  |  Ret Address  |  eBp+4
  |_______________|
  |  Old eBp      |  eBp+0
  |_______________|
 
 Index  0 1 2
   x  45 67 -23 58 
Displacement  0 4 8 12
Offset  124016 124416 124816 124C16
.data
  Sum   dd     ?
  x     dd     45,67,-23,58 ;; Assume x starts at offset 1240 

.code    
main    proc  near  
        Push  offset x      ; Offset 1240
        Call  Sumf
        Add   eSp, 4
        Mov   Sum, eAx

	invoke ExitProcess, 0
main    Endp

Sumf    proc  Near
        Push  eBp
        Mov   eBp, eSp

        Mov   eAx, 0
        Mov   eSi, [eBp+8] ; Start of array offset 1240
        Mov   eCx, eSi
        Add   eCx, 3*4     ; End of array offset 124C = 1240+12
 for:   Cmp   eSi, eCx
        Jbe   do
        Jmp   endfor
  do:   Add   eAx, [eSi]
        Add   eSi, 4       ; Next index, move eSi pointer
        Jmp   for
 endfor:
        Pop   eBp
        Ret
Sumf    Endp
        End   main

Counting Iteration

It is natural to use iteration on an array and iteration requires some form of counting. The Loop instruction is provided to make counting iteration simpler. Consider the following equivalent examples, both of which prints 5, 4, 3, 2, 1:
Counting Iteration
eCx = 5;
do {
    cout << eCx;
    eCx--;
} while (eCx != 0);
        Mov    eCx, 5
 do:    Mov    eAx, eCx
        Call   PutDec
 while: Loop   do
The Loop label instruction implements iteration that executes eCx times by:
  1. Decrementing eCx
  2. Branching to label if eCx is not zero
Conditional Counting Iteration

Variations of the Loop instruction can simultaneously check if flag results are true and eCx is nonzero to continue iteration:

Conditional Counting Iteration
eCx = 5;
do {
    cin >> eAx;
    eCx--;
} while (eCx != 0 && eAx != 100);
        Mov    eCx, 5
 do:    Call   GetDec
        Cmp    eAx, 100
 while: LoopNe do

String Instructions

Somewhat of a misnomer, string instructions operate on indexed variables, that is arrays. The string instructions abstract much of the common array operations of copying an array, storing or loading an array entry, scanning an array for a specific value, etc. The string instructions always have a direction through the array and have one or both of a source and destination array designated in the following general form: For example, consider storing one 'A' character to the array s. Using string instructions the following steps are required.
  1. Point eDi to array s - Mov  eDi, offset s.
  2. Set the direction flag to increment - Cld.
  3. Set Al to 'A' - Mov  Al, 'A'
  4. Execute the string store Al instruction - Stosb.
Effect of String Instruction
       Mov    eDi, offset s
       Mov    Al, 'A' 
       Cld                             ; Direction is increment
       Stosb                          ; Mov [eDi], Al   and    Inc  eDi

The more useful example is to fill the entire character array s with an 'A'. The example below compares C++, indexing, and string instructions. The only difference between the above and below example is that the Stosb is repeatedly executed using the Loop instruction.
 

Initialize Character Array using String Instructions
C++ Indexing String Store Byte Repeated String 
Store Byte
char s[5];
int i;

for (i=0; i<=4; i++) 
    s[i] = 'A';

 Index  0 1 2 4
   s  A B C D E
Displacement  0 1 2 3 4
Offset  123016  123116  123216  123316  123416

.data    
    s   db  5 dup(?) 

     Mov     eDi, 0
for: Cmp     eDi, 4
     Jbe     do
     Jmp     endfor
 do: Mov     s[eDi], 'A'
     Inc     eDi
     Jmp     for
endfor:

.data
   s   db  5 dup(?) 

   Mov    eDi, Offset s 
   Cld
   Mov    eCx, 5
   Mov    Al, 'A'


do:       Stosb
while:    Loop   do
.data
 s   db  5 dup(?)

 Mov  eDi, Offset s
 Cld
 Mov  eCx, 5
 Mov  Al, 'A'


 rep     Stosb
 

 
String Instruction List 
Instruction Effect Example

C
o
p
y

MovsB
MovsW
MovsD
 
 
 
 
 

 

Moves a byte or word from the source in eSi to the destination in eDi. eSi and eDi are incremented or decremented depending upon the direction flag. 

The effect of the MovsD in the following is:

       Cld             ; Direction is increment
       MovsD     ; Effect is:
                         ;     Mov [eDi], [eSi]
                         ;     Add eDi, 4
                         ;     Add eSi, 4

Copy 20 double words of array src to array dest.
      Mov  eSi, Offset src
      Mov  eDi, Offset dest
      Mov  eCx, 20
      Cld

      rep   MovsD

L
o
a
d

LodsB
LodsW
LodsD
 
 
 
 
Loads a byte, word or double word from the source in eSi to Al for bytes, Ax for words, and eAx for double words. eSi is incremented or decremented depending upon the direction flag. 

The effect of the LodsD in the following is:

       Cld             ; Direction is increment
       LodsD       ; Effect is:
                         ;     Mov e Ax, [eSi]
                         ;     Add eSi, 4

Print 20 double words of array src.
        Mov  eSi, Offset src
        Mov  eCx, 20
        Cld

do:     LodsD
        Call  PutDec
while:  Loop do

S
t
o
r
e

StosB
StosW
StosD
 
 
 
 

 

Stores a byte in Al, word in Ax, or double word in eAx to the destination in eDi. eDi is incremented or decremented depending upon the direction flag. 

The effect of the StosD in the following is:

       Cld             ; Direction is increment
       StosD        ; Effect is:
                         ;     Mov [eDi], eAx
                         ;     Add eDi, 4

Input 20 double words to array dest.
        Mov  eDi, Offset dest
        Mov  eCx, 20
        Cld

do:     Call GetDec
        StosD
while:  Loop do

C
o
m
p
a
r
e
 

 

CmpsB
CmpsW
CmpsD
 
 
 
 
 
 

 

Compare bytes, words or double words at the source array at eSi to the destination array at eDi. The flags are set as though a Cmp was performed. eSi and eDi are incremented or decremented depending upon the direction flag. 

The effect of the CmpsD in the following is:

       Cld             ; Direction is increment
       CmpsD      ; Effect is:
                         ;     Cmp [eSi], [eDi]
                         ;     Add eSi, 4
                         ;     Add eDi, 4

Compare up to 20 double words of array src to dest

ZF=1 if arrays are equal at end of comparison.
      Mov  eSi, Offset src
      Mov  eDi, Offset dest
      Mov  eCx, 20
      Cld

      repE   CmpsD

if:   Je     Then
      Jmp    Endif
then:     :            ; Arrays are equal
             :

S
c
a
n
 

 
 

 

ScasB
ScasW
ScasD
 

 

Scans array at eDi for the byte in Al, word in Ax, or double word in eAx . 
eDi is incremented or decremented depending upon the direction flag.

The effect of the Scasb in the following is:

       Cld                ; Direction is increment
       Mov  Al, 'A'    ; Scan for 'A' in next byte
       ScasB           ; Effect is:
                            ;     Cmp Al, [eDi]
                            ;     Add eDi, 1

Scan up to 20 bytes of array dest for 'A'. 
ZF=1 on exit if 'A' found, eDi-1 is location of 'A' if ZF=1.
        Mov  eDi, Offset dest
        Mov  eCx, 20
        Cld
        Mov  Al, 'A'

        repnZ Scasb

Repeat Prefixes

Shorthand for Loop.

Repeat Prefixes and Loop Comparison 
Repeat Loop
rep   Scasb do:
    ScasB
while: Loop do
repZ ScasB
repE ScasB
do:
    ScasB
while: LoopE do
repNZ ScasB
repNE ScasB
do:
     ScasB
while: LoopNZ do

Questions

  1. Copy 1000 bytes from string s to string d.
  2. Sum 1000 values of the double word array X.
  3. Fill 1000 values of the double word array X with 0.
  4. Locate the address of the first occurrence of 'Z' in the string s.

String Examples

String Examples
.data
         String	db	"ABCDEFG", 0
         gnirtS	db	"       ",0

.code 
; Reverse src string of length N to dest string
				;; void Reverse(char src[], char dest[], int N);
Reverse Proc    Near C, src : near ptr byte, dest : near ptr byte, N : dword      
	Push	eSi
	Push	eDi
        Mov     eCx, N       	;;     eCx = N;
        Mov     eSi, src
        Mov     eDi, dest    	;;     eDi = dest;
        Add     eSi, eCx     	;;     eSi = src + eCx;
        Dec     eSi

  @@do: Std                  	;;     do {
        LodsB                	;;           Al = *Si--;
        Cld                  	;;           *Di++ = Al;
        Stosb                	;;     } while( Cx-- != 0);
  @@while:
        Loop    @@do         	;; }
	  Pop	    eDi
	  Pop	    eSi
        Ret      
Reverse Endp
 
;; Print a 0 terminated string	
     				       ;; void putstr( char s[]) {
putstr	  Proc    Near C, s : near ptr byte 
	Push	  eSi
	Mov     eSi, s         ;;    eSi = s;
	Cld
@@while:
	Lodsb                  ;;    while ((Al = [eSi++]) != 0) 
	Cmp     Al, 00         ;;        cout << Al; 
	Jne     @@do                   
	Jmp     @@endwhile     ;; }        
 @@do:           
	invoke  WriteChar
	Jmp     @@while
@@endwhile:
	Pop	  eSi
	Ret     
putstr	  Endp                    
;; Number of characters in 0 terminated string
				      ;; int strlen(char s[]){
	Push	  eDi    
	Cld
	Mov     Al, 0         	;;   Terminating 0
	Mov     eCx, 0ffffh    ;;   eCx = 65535;
	Mov     eDi, s       	;;   do { eCx++;

	repne   Scasb          ;;   } while (--eCx != 0 && Al != [eDi++]

	Mov	  eAx, 0ffffh
	Sub	  eAx, eCx      	;;   eAx = 65535 - eCx
	Dec	  eAx               
	Pop	  eDi	
	Ret                   	;; Return length in eAx
strlen Endp                     
main	Proc	near			;; void main(void) {
					;;  Reverse( String, gnirtS, strlen(String) )
	invoke	strlen, addr gnirtS	

	invoke	Reverse, addr String, addr gnirtS, eAx
 
	invoke	putstr, addr gnirtS	;;    putstr( gnirtS )
         
	invoke	ExitProcess, 0
main	Endp

	End      main