News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Unicode hNullString version Draft

Started by Farabi, April 08, 2011, 02:00:49 AM

Previous topic - Next topic

Farabi

Here is unicode version of hNullString. Slower than expected but, better than nothing.
Dependency:
MASM32 Lib


Low Level Function

ucFile struct
nHeader dword 0
lpData dword 0
lpLineTable dword 0
nSize dword 0
nLineCount dword 0
lpReserved dword 0
lpReserved2 dword 0
lpWordCount dword 0
ucFile ends

fWordS struct
nHeader dword 0
lpAddress dword 0
nSize dword 0
fWordS ends

.code



fucGetOffsetTable proc uses esi edi nPos:dword,lpTbl:dword,nMaxPos:dword

mov esi,lpTbl
mov ecx,nPos
mov edx,nMaxPos

cmp ecx,edx
jae brs
xor eax,eax
cmp ecx,eax
jle brs
dec ecx
mov eax,[esi+ecx*4]
ret
brs:
xor eax,eax
dec eax
ret
fucGetOffsetTable endp

fucGetLineLen proc lpsf:dword,dwInd:dword
LOCAL d:dword

mov esi,lpsf
inc dword ptr[esi+16]
inc dword ptr[esi+16]
invoke GetOffsetTable,dwInd,[esi+8],[esi+16]
mov d,eax
mov eax,dwInd
inc eax
cmp eax,[esi+16]
je it_last
cmp eax,[esi+16]
ja error
invoke GetOffsetTable,eax,[esi+8],[esi+16]
sub eax,d
sub eax,2
dec dword ptr[esi+16]
dec dword ptr[esi+16]
ret
it_last:
mov eax,[esi+12]
sub eax,d
dec dword ptr[esi+16]
dec dword ptr[esi+16]
ret
error:
dec dword ptr[esi+16]
dec dword ptr[esi+16]
xor eax,eax
ret
fucGetLineLen endp

fucWordCount proc uses esi edi lpWord:dword,nWord:dword
LOCAL stts,stts2:dword
LOCAL cntr:dword

mov esi,lpWord

xor ecx,ecx
mov stts,ecx
mov stts2,ecx
mov cntr,ecx
loop_word:
push ecx
mov dx,[esi+ecx]
.if dx==32
.if stts==0
inc stts
.else
.if stts2==1
; mov stts,0
mov stts2,0
.endif
.endif
.else
.if stts==1
.if stts2==0
inc stts2
inc cntr
.endif
.else
.if ecx==0
inc stts
inc stts2
inc cntr
.endif
.endif
.endif
pop ecx
add ecx,2
cmp ecx,nWord
jl loop_word
mov eax,cntr

ret
fucWordCount endp

fucGetWord proc uses esi edi lpucFile:dword,ln:dword,ind:dword
LOCAL wCnt:dword
LOCAL nOffs,nLine,nWC

mov esi,lpucFile

invoke fucGetOffsetTable,ln,[esi].ucFile.lpLineTable,[esi].ucFile.nLineCount
mov nOffs,eax
invoke fucGetLineLen,esi,ln
mov nLine,eax
invoke fucWordCount,nOffs,nLine
mov nWC,eax
.if eax==0
xor eax,eax
dec eax
ret
.endif

mov edx,[esi+24] ; lplinetbl

mov ecx,ln ; line count
cmp ecx,[esi+16]
ja no_word
dec ecx ;
mov edx,[edx+ecx*4] ; line structure, edx = lpword
mov ecx,ind ; word count
cmp ecx,nWC
ja no_word
dec ecx
mov eax,[edx+ecx*4] ; word structure, edx=the structur
ret
no_word:
xor eax,eax
dec eax


ret
fucGetWord endp

fucWordScaner proc uses esi edi lpWord:dword,nWord:dword,lpTable:dword
LOCAL stts,stts2:dword
LOCAL cntr:dword
LOCAL buff[256]:dword

mov esi,lpWord
mov edi,lpTable

xor ecx,ecx
mov stts,ecx
mov stts2,ecx
mov cntr,ecx
loop_word:
push ecx
mov dx,[esi+ecx]
.if dx==32
.if stts==0
inc stts
.else
.if stts2==1
; mov stts,0
mov stts2,0
inc cntr
.endif
.endif
.else
.if stts==1
.if stts2==0
inc stts2
mov eax,cntr
push ecx
add ecx,esi
mov [edi+eax*4],ecx
pop ecx

.endif
.else
.if ecx==0
inc stts2
inc stts
mov eax,cntr
push ecx
add ecx,esi
mov [edi+eax*4],ecx
pop ecx
;inc cntr
.endif
.endif
.endif
pop ecx
add ecx,2
cmp ecx,nWord
jl loop_word

mov eax,cntr

ret
fucWordScaner endp

fucWordLen proc uses esi edi lpucFile:dword,ln:dword,nIndex:dword
LOCAL llen:dword
LOCAL lpos1,lpos2:dword
LOCAL buff[256]:dword
LOCAL nOffs,nLine,nWC

mov esi,lpucFile
invoke fucGetLineLen,esi,ln
mov llen,eax
invoke fucGetWord,esi,ln,nIndex
mov lpos1,eax
inc nIndex
invoke fucGetWord,esi,ln,nIndex
mov lpos2,eax

invoke fucGetOffsetTable,ln,[esi].ucFile.lpLineTable,[esi].ucFile.nLineCount
mov nOffs,eax
invoke fucGetLineLen,esi,ln
mov nLine,eax
invoke fucWordCount,nOffs,nLine
mov nWC,eax
.if eax==1
mov eax,nLine
ret
.endif

.if lpos1==-1
xor eax,eax
ret
.endif

.if lpos2!=-1
mov eax,lpos2
sub eax,lpos1
ret
.else
mov eax,[esi].ucFile.lpData
sub lpos1,eax
mov eax,lpos1
sub eax,llen
ret
.endif

ret
fucWordLen endp

fucBuildWordTable proc uses esi edi lpucFile:dword
LOCAL nIndex,nOffs,nLine,nlpTbl:dword
LOCAL buff[256],dbW,dbWs:dword

mov esi,lpucFile

mov ecx,[esi].ucFile.nLineCount
shl ecx,2
invoke mAlloc,ecx
push eax
pop [esi].ucFile.lpReserved2

xor ecx,ecx
inc ecx
loop_each_line:
push ecx
mov nIndex,ecx
invoke fucGetOffsetTable,nIndex,[esi].ucFile.lpLineTable,[esi].ucFile.nLineCount
mov nOffs,eax
invoke fucGetLineLen,esi,nIndex
mov nLine,eax
invoke fucWordCount,nOffs,nLine
mov dbW,eax
mov ecx,eax
shl ecx,2
invoke mAlloc,ecx
mov nlpTbl,eax
mov ecx,nIndex
dec ecx
mov edx,[esi].ucFile.lpReserved2
mov [edx+ecx*4],eax
invoke fucWordScaner,nOffs,nLine,nlpTbl
mov dbWs,eax
pop ecx
inc ecx
cmp ecx,[esi].ucFile.nLineCount
jl loop_each_line

;invoke dw2a,ecx,addr buff
;invoke MessageBox,0,addr buff,0,0


ret
fucBuildWordTable endp

fucCreateOffsetTable proc uses esi ediĀ  lpSou:dword,lpTbl:dword,FileSize:dword,lnCnt:dword
;LOCAL rslt,Pos:dword
LOCAL buff[256]:dword


mov esi,lpSou
mov edi,lpTbl

xor ecx,ecx
xor eax,eax
mov [edi+eax*4],esi
inc eax
loop_d:
push ecx
mov dx,[esi+ecx]
.if dx==10
push ecx
add ecx,esi
mov [edi+eax*4],ecx
pop ecx
inc eax
.endif
pop ecx
add ecx,2
cmp ecx,FileSize
jl loop_d

ret
fucCreateOffsetTable endp

fucLineCount proc uses esi edi lpTheFile:dword,FileSize:dword

mov esi,lpTheFile


xor ecx,ecx
xor eax,eax
inc eax

dloop:
mov dx,[esi]
cmp dx,13
jnz @f
inc eax
@@:
add esi,2
add ecx,2
cmp ecx,FileSize
jl dloop


ret
fucLineCount endp



fucLoadFile proc uses esi edi lpFileName:dword,lpucFile:dword
LOCAL hFile:dword
LOCAL nFile:dword
LOCAL buff[256]:dword
LOCAL nWord:dword
LOCAL nOffset:dword

;invoke mAlloc,sizeof ucFile
mov eax,lpucFile
mov hFile,eax

invoke exist,lpFileName
.if eax==-1
invoke MessageBox,0,CADD("File not Found"),0,0
ret
.endif

invoke filesize,lpFileName
mov nFile,eax
push eax
mov ecx,hFile
pop [ecx].ucFile.nSize
invoke mAlloc,eax

mov ecx,hFile
push eax
pop [ecx].ucFile.lpData

push ecx
invoke read_disk_file,lpFileName,addr [ecx].ucFile.lpData,addr nFile
pop ecx

push ecx
invoke fucLineCount,[ecx].ucFile.lpData,nFile
pop ecx

push eax
pop [ecx].ucFile.nLineCount

push ecx
shl eax,2
invoke mAlloc,eax
pop ecx

push eax
pop [ecx].ucFile.lpLineTable

push ecx
invoke fucCreateOffsetTable,[ecx].ucFile.lpData,[ecx].ucFile.lpLineTable,nFile,[ecx].ucFile.nLineCount
pop ecx

invoke fucBuildWordTable,hFile



ret
fucLoadFile endp



Example how to use it
ScanUnique proc uses esi edi lpFile:dword
LOCAL rFile,fInd,nOffs,nLine:dword
LOCAL nWC,nOW,wInd,wLen,rOff:dword
LOCAL buff[256]:dword
LOCAL buff2[256]:dword

mov esi,lpFile
mov ecx,[esi].ucFile.nSize
invoke mAlloc,ecx
mov rFile,eax

mov dx,0FEFFh
mov [eax],dx
add rFile,2

push rFile
pop rOff

xor ecx,ecx
inc ecx
loop_each_line:
push ecx
mov fInd,ecx
invoke fucGetOffsetTable,fInd,[esi].ucFile.lpLineTable,[esi].ucFile.nLineCount
mov nOffs,eax
invoke fucGetLineLen,esi,fInd
mov nLine,eax
invoke fucWordCount,nOffs,nLine
mov nWC,eax
xor ecx,ecx
inc ecx
loop_each_word:
push ecx
mov wInd,ecx

.if nWC==0 ; If word count is 0, it mean there is no word.
invoke dw2a,nWC,addr buff
invoke MessageBox,0,addr buff,0,0
jmp @f
.endif

invoke fucGetWord,esi,fInd,wInd
mov nOW,eax
invoke fucWordLen,esi,fInd,wInd
mov wLen,eax



invoke memfill,addr buff,1024,0
invoke MemCopy,nOW,addr buff,wLen
invoke ucRtrim,addr buff,addr buff

lea edx,buff
mov ax,[edx]


invoke ucFind,1,rFile,addr buff
.if eax==0
invoke ucLen,addr buff
shl eax,1
mov wLen,eax
invoke MemCopy,addr buff,rOff,wLen
mov eax,wLen
add rOff,eax
invoke MemCopy,CADD(0dh,0h,00h,00),rOff,4
add rOff,2
.endif
@@:

pop ecx
inc ecx
cmp ecx,nWC
jl loop_each_word
pop ecx
inc ecx
cmp ecx,[esi].ucFile.nLineCount
jl loop_each_line

sub rFile,2
mov ecx,rOff
sub ecx,rFile
invoke write_disk_file,CADD("Result.txt"),rFile,ecx


ret
ScanUnique endp



What you should put on your code
.data
fq fucFile <?>
.code
invoke fucLoadFile,CADD("YourUnicodeFile.txt"),addr fq
invoke ScanUnique,addr fq


This is the draft, I can make it faster. But not now. Please tell me if it doesnot work on your machine, or it causing a crash, but working on my XP.

[edit]Last version only allowed you to use arabic font, this one can use any unicode
Those who had universe knowledges can control the world by a micro processor.
http://www.wix.com/farabio/firstpage

"Etos siperi elegi"