We help IT Professionals succeed at work.

Huffman Compression and decompression code on assembly language

m_hormaty asked
Can you please help me find the Huffman compression and decopression in a file, (Not in C++/C,java, It has to be in ASM). I was try to do the whole program but it is giving a hard time.
I need to review the code beforemy presentation,
Watch Question

Show what you have so far; this is an academic thing, right?  Also those are two fairly substantial programs.  Also, if it's an assignment, you might be expected to do it a certain way, such as actually accessing a binary tree one node at a time instead of doing it the harder but more efficient way with a table lookup.
Are you dealing with an existing format or are you making your own, or are you given the tree so you don't have to deal with that part?
What CPU/assembly language is this?
You might look at some open-source programs to see how they do it, such as pack (an old Unix Huffman-coding thing) or zip/gzip (uses Huffman as part of its algorithm.
Do it in C, compile and diassemble
yep. first we would need any information on the processor you are using (CPU, uC, whatever) what is the input and output width of you data and so on.

(i would noz recommend to compile and disassemble as this will produce very slow code and will most likely not work on the target chip....)

while looking - i found some code (the one below for amd - x86 can be found here: https://devel.neopsis.com/projects/yukon/browser/trunk/src/asm?rev=2)
2	BITS 64
4	SECTION .text
5	global huffCompress
6	global huffDecompress
8	%macro begin 0
9	        push rbx
10	        push rdx
11	        push r12
12	        push r14
13	        push r15
14	%endmacro
16	%macro end 0
17	        pop r15
18	        pop r14
19	        pop r12
20	        pop rdx
21	        pop rbx
22	%endmacro
24	;; u32 *huffCompress(u32 *dst, u8 *src, u8 *end, u32 *tbl)
25	huffCompress:
26	    mov     r10,rcx            ; table with huffman codes/lengths
27	    mov     r8b,-32            ; bits to go
28	    xor     r11,r11
30	.loop:
31	    mov     r11b,byte [rsi]    ; load byte
32	    mov     ecx, [r10+4*r11]   ; load huffman code/length
33	    add     r8b,cl             ; add code legth into 'r8b'
34	    jl      .nostore           ; enough space in the register?
36	    sub     cl,r8b             ; no, put in what fits in and adjust 'cl'
37	    sub     r8b,32
38	    shld    eax,ecx,cl
39	    add     cl,r8b
40	    bswap   eax
41	    mov     [rdi],eax
42	    add     rdi,4
44	.nostore:                      ; put the code into 'eax'
45	    shld    eax,ecx,cl
47	    add     rsi,1              ; advance source by one byte
48	    cmp     rsi,rdx            ; loop...
49	    jnz     .loop
51	    cmp     r8b,-32            ; extra bits to flush?
52	    jle    .noextra
54	    mov     cl,r8b
55	    neg     cl
56	    shl     eax,cl
57	    bswap   eax
58	    mov     [rdi],eax
59	    add     rdi,4
61	.noextra:
62	    mov     rax,rdi            ; how many dwords did we encode?
63	    retn
65	huffDecompress:
66	    begin
68	    mov     r11,rcx               ; shift table
69	    mov     r12,rcx
70	    add     r12,256               ; pointers to decoder tables
72	        mov     rax,0                 ; bit-position
74	.loop:
75	    mov     rbx,rax               ; copy bit-position
76	    mov     cl,al                 ; bit-position in the current qword
77	    shr     rbx,6                 ; bit-position / 64 (eg. index of current qword in src)
78	    mov     r15,[rsi+rbx*8]       ; load 1. dword
79	    mov     r14,[rsi+rbx*8+8]     ; load 2. dword
80	    bswap   r15
81	    bswap   r14
82	    shld    r15,r14,cl            ; left-shift to have the beginning on the very left position
83	    mov     r8,r15                ; working copy of r15
84	    shr     r8,32
85	    or      r8,1
86	    shl     r8,32
87	    bsr     rbx,r8                ; store MSB in rbx
88	    btr     r8,rbx                ; clear MSB from r8
89	    sub     rbx,32
90	    mov     rbx,[r12+rbx*8]       ; load pointer to the decoder table
91	    mov     cl,[rbx]              ; first byte is residual symbol length
92	    add     cl,32
93	    shr     r8,cl                 ; right-shift to extract the symbol
94	    movzx   rcx,byte [rbx+r8+1]   ; load decoded byte
95	    movzx   rbx,byte [r11+rcx]    ; load symbol length
96	    add     rax,rbx               ; add symbol length to rax
98	    mov     [rdi],cl              ; store decoded symbol
99	    add     rdi,1                 ; advance dst
101	    cmp     rdi,rdx
102	    jb      .loop
104	    end
106	    mov     rax,r10
108	    retn

Open in new window

Top Expert 2014

Since assembly language is machine/cpu architecture dependent it may help to know what machine/cpu is your target.
Mike McCrackenSenior Consultant
Most Valuable Expert 2011
Top Expert 2013

This question has been classified as abandoned and is closed as part of the Cleanup Program. See the recommendation for more details.