swingonaspiral swingonaspiral - 2 months ago 28
Linux Question

ROT13 Cipher in Assembly

So I've got my ROT13 cipher pretty much doing what I want, however at the end the command prompt shows up on the same line as the last line of output. This is my first project in Assembly so I'm pretty unsure of what I'm doing wrong.

%define STDIN 0
%define STDOUT 1
%define SYSCALL_EXIT 1
%define SYSCALL_READ 3
%define SYSCALL_WRITE 4
%define BUFLEN 256


SECTION .data ; initialized data section

msg1: db "Enter string: " ; user prompt
len1: equ $-msg1 ; length of first message

msg2: db "Original: " ; original string label
len2: equ $-msg2 ; length of second message

msg3: db "Convert: " ; converted string label
len3: equ $-msg3

msg4: db 10, "Read error", 10 ; error message
len4: equ $-msg4


SECTION .bss ; uninitialized data section
buf: resb BUFLEN ; buffer for read
newstr: resb BUFLEN ; converted string
rlen: resb 4 ; length


SECTION .text ; Code section.
global _start ; let loader see entry point

_start: nop ; Entry point.
start: ; address for gdb

; prompt user for input
;
mov eax, SYSCALL_WRITE ; write function
mov ebx, STDOUT ; Arg1: file descriptor
mov ecx, msg1 ; Arg2: addr of message
mov edx, len1 ; Arg3: length of message
int 080h ; ask kernel to write

; read user input
;
mov eax, SYSCALL_READ ; read function
mov ebx, STDIN ; Arg 1: file descriptor
mov ecx, buf ; Arg 2: address of buffer
mov edx, BUFLEN ; Arg 3: buffer length
int 080h

; error check
;
mov [rlen], eax ; save length of string read
cmp eax, 0 ; check if any chars read
jg read_OK ; >0 chars read = OK
mov eax, SYSCALL_WRITE ; ow print error mesg
mov ebx, STDOUT
mov ecx, msg4
mov edx, len4
int 080h
jmp exit ; skip over rest
read_OK:


; Loop for upper case conversion
; assuming rlen > 0
;
L1_init:
mov ecx, [rlen] ; initialize count
mov esi, buf ; point to start of buffer
mov edi, newstr ; point to start of new string

L1_top:
mov al, [esi] ; get a character
inc esi ; update source pointer
cmp al, ' ' ; Make sure none of these characters is entered
je L1_cont ; Jump if one is present
cmp al, '?'
je L1_cont
cmp al, '1'
je L1_cont
cmp al, '+'
je L1_cont
cmp al, '='
je L1_cont
cmp al, '!'
je L1_cont
cmp al, 'a' ; less than 'a'?
jb L1_uppershiftwrap
cmp al, 'm' ; more than 'z'?
ja L1_lowershift
add al, 13 ; shift 13 spaces

L1_cont:
mov [edi], al ; store char in new string
inc edi ; update dest pointer
dec ecx ; update char count
jnz L1_top ; loop to top if more chars


L1_uppershiftwrap:
cmp ecx, 0
jz L1_end
cmp al, 'M'
jb L1_uppershift
cmp ecx, 0
jz L1_end
sub al, 13
mov [edi], al
inc edi
dec ecx
jnz L1_top

L1_uppershift:
cmp ecx, 0
jz L1_end
add al, 13
mov [edi], al
inc edi
dec ecx
jnz L1_top

L1_lowershift:
cmp ecx, 0
jz L1_end
sub al, 13
mov [edi], al
inc edi
dec ecx
jnz L1_top

L1_end:


; print out user input for feedback
;
mov eax, SYSCALL_WRITE ; write message
mov ebx, STDOUT
mov ecx, msg2
mov edx, len2
int 080h

mov eax, SYSCALL_WRITE ; write user input
mov ebx, STDOUT
mov ecx, buf
mov edx, [rlen]
int 080h

; print out converted string
;
mov eax, SYSCALL_WRITE ; write message
mov ebx, STDOUT
mov ecx, msg3
mov edx, len3
int 080h

mov eax, SYSCALL_WRITE ; write out string
mov ebx, STDOUT
mov ecx, newstr
mov edx, [rlen]
mov dl, 0Dh
int 080h


; final exit
;
exit: mov eax, SYSCALL_EXIT ; exit function
mov ebx, 0 ; exit code, 0=normal
int 080h ; ask kernel to take over

Answer

Your program doesn't print a newline at the end of its output, so the cursor is at the end of a non-empty line when it exits. The shell doesn't know this, and prints the next prompt there.

echo foo includes a trailing newline, so when the shell prints the next prompt, the cursor was already at the start of a new line. echo -n foo doesn't include a trailing newline, so it leaves the cursor at the end of a line that starts with foo, and your prompt is tacked on to that, like your program does. Pipe those echo commands into hd to see a hexdump of the ASCII characters they print.


So the solution is to make sure your output ends with a newline (ASCII code = 10). You already have that in your msg4: db 10, "Read error", 10 string. (It starts with a newline as well as ending with one.) In C you'd write `"\nRead error\n", but NASM syntax doesn't work that way. It does support C-style escapes inside backquoted strings, but it's typical for people to write newlines with numeric constants.


Your user input (that you get from sys_read) should usually end with a newline, unless the user typed 256 characters on a line, or used ctrl-D to make read return early. (Or similarly piped input that doesn't end with a newline, so read hits EOF).

I started to follow the logic of the compares, but it got tiring pretty quickly. I'm not sure what happens to newlines in your input, but I suspect that your code modifies newlines in the buffer. You should probably avoid that, and leave them unchanged. I guess you'd just add them to your list of compares & branch for characters not to modify.

That's probably more useful behaviour for a rot13 program than adding an extra newline to the end of the buffer, or calling sys_write one extra time to print a newline by itself.


You can test the system calls your program makes using strace. e.g. strace ./a.out will decode the read() and write() system calls you make.

For more debugging tips, see the bottom of the tag wiki. (Which has lots of useful stuff besides that).


BTW, you could do all those cmp al, '?' compares in parallel in an xmm register, with SSE2 (broadcast al to every element of an xmm register, and PCMPEQB with a constant / PMOVMSKB / test/jnz). But don't worry about that until you have a good handle on scalar code.