Hi everyone,
While we are waiting for Japheth to come back I have decided to make some changes to JWasm6
I have suggested to Japheth few months ago to make this changes but he was resisting because of
danger for using of a stack in programming
In my opinion an assembler programmer must be able to take care of everything
and we can use local variables or registers instead of pushing and popping a stack
I have tested it on complex files and it works beautifully
here is how I have done it:
changes to JWasm v2.06e source
file: proc.c
line 413
#if AMD64_SUPPORT
/* adjust start displacement for Win64 FRAME procs.
* v2.06: the list may contain xmm registers, which have size 16!
*/
if ( info->isframe ) {
uint_16 *regs = info->regslist;
int sizestd = 0;
int sizexmm = 0;
if ( regs )
for( cnt = *regs++; cnt; cnt--, regs++ )
if ( GetValueSp( *regs ) & OP_XMM )
sizexmm += 16;
else
sizestd += 8;
displ = sizexmm + sizestd;
if (( sizestd & 0xf ) && sizexmm) //line 428 here was a bug, this fixes it
displ += 8;
}
#endif
file: invoke.c
line 148
static void ms64_fcstart( struct dsym *proc, int numparams, int start, struct asm_tok tokenarray[], int *value )
/**************************************************************************************************************/
{
/* v2.04: VARARG didn't work */
if ( proc->e.procinfo->is_vararg ) {
//numparams = ( tokenarray[start].token != T_FINAL ? 1 : 0 );
for ( numparams = 0; tokenarray[start].token != T_FINAL; start++ )
if ( tokenarray[start].token == T_COMMA )
numparams++;
}
DebugMsg1(("ms64_fcstart(%s, numparams=%u) vararg=%u\n", proc->sym.name, numparams, proc->e.procinfo->is_vararg ));
if ( numparams < 4 )
numparams = 4;
else if ( numparams & 1 )
numparams++;
*value = numparams;
//AddLineQueueX( " sub %r, %d", T_RSP, numparams * 8 ); //here we prevent invoke to alter the stack
return;
}
line 170
static void ms64_fcend( struct dsym *proc, int numparams, int value )
/*******************************************************************/
{
/* use <value>, which has been set by ms64_fcstart() */
//AddLineQueueX( " add %r, %d", T_RSP, value * 8 ); //here we prevent invoke to alter the stack
return;
}
;-------------------------------------------------------------------------------------
;here is a modified simple example from Japheth's site how to use the modified version of JWasm.exe
option casemap:none
option win64:1
option frame:auto
.nolist
.nocref
WIN32_LEAN_AND_MEAN EQU 1
_WIN64 EQU 1
include windows.inc
.list
.cref
includelib kernel32.lib
includelib user32.lib
WinMain proto :HINSTANCE, :HINSTANCE, :LPSTR, :UINT
;mov mem to mem instead of PUSH, POP
m2m MACRO M1, M2
movq xmm4,M2
movq M1,xmm4
ENDM
;mov ADDR to MEMORY
a2m MACRO mem,adr
lea rax,adr
mov mem,rax
ENDM
.const
.data?
hInstance HINSTANCE ?
CommandLine LPSTR ?
.data
ClassName db "SimpleWinClass",0
AppName db "Our First Window",0
.code
WinMainCRTStartup proc FRAME
local shadows[4]:QWORD ;shadow space to accomodate all calls in this function min 4 QWORD
;it has to be the last local var
invoke GetModuleHandleA, NULL
mov hInstance, rax
invoke GetCommandLineA
mov CommandLine, rax
invoke WinMain, hInstance, NULL, CommandLine, SW_SHOWDEFAULT
invoke ExitProcess, eax
WinMainCRTStartup endp
;-------------------------------------------------------------------------------------
WinMain proc FRAME hInst:HINSTANCE, hPrevInst:HINSTANCE, CmdLine:LPSTR, CmdShow:UINT
local wc:WNDCLASSEXA
local msg:MSG
local hwnd:HWND
local shadows[12]:QWORD ;shadow space to accomodate all calls in this function
;CreateWindowExA has 12 parameters
mov hInst, rcx
mov wc.cbSize, SIZEOF WNDCLASSEXA
mov wc.style, CS_HREDRAW or CS_VREDRAW
a2m wc.lpfnWndProc, WndProc
mov wc.cbClsExtra, NULL
mov wc.cbWndExtra, NULL
mov wc.hInstance, rcx
mov wc.hbrBackground, COLOR_WINDOW+1
mov wc.lpszMenuName, NULL
a2m wc.lpszClassName, ClassName
invoke LoadIconA, NULL, IDI_APPLICATION
mov wc.hIcon, rax
mov wc.hIconSm, rax
invoke LoadCursorA, NULL, IDC_ARROW
mov wc.hCursor,rax
invoke RegisterClassExA, addr wc
invoke CreateWindowExA, NULL, ADDR ClassName, ADDR AppName,\
WS_OVERLAPPEDWINDOW, CW_USEDEFAULT,\
CW_USEDEFAULT, CW_USEDEFAULT,CW_USEDEFAULT, NULL, NULL,\
hInst, NULL
mov hwnd,rax
invoke ShowWindow, hwnd, SW_SHOWNORMAL
invoke UpdateWindow, hwnd
.while (1)
invoke GetMessageA, ADDR msg, NULL, 0, 0
.break .if (!rax)
invoke TranslateMessage, ADDR msg
invoke DispatchMessageA, ADDR msg
.endw
mov rax, msg.wParam
ret
WinMain endp
;-------------------------------------------------------------------------------------
WndProc proc FRAME hWnd:HWND, uMsg:UINT, wParam:WPARAM, lParam:LPARAM
local shadows[4]:QWORD ;shadow space to accomodate all calls in this function min 4 QWORD
.if (edx == WM_DESTROY)
invoke PostQuitMessage, NULL
xor rax,rax
.else
invoke DefWindowProcA, rcx, edx, r8, r9
.endif
ret
WndProc endp
end WinMainCRTStartup
regards
hi,
that's nice.
I've also ask (http://sourceforge.net/tracker/?func=detail&aid=3133718&group_id=255677&atid=1126896) Japhet to add an option, that allows this. He meant that current implementation of INVOKE and/or PROC are not final - let see what he will do when he comes back to jWasm.
(Automatecally calculation and allocation of required shadow space, based on used INVOKEs, would also be a nice feature)
qWord
thanks qWord,
you are right about automatic calculation
I have been studying how to implement it and,
if Japheth doesn't come back soon, I will do it myself
however, for now this version works fine for me and
it will be easy to remove local shadows ones when automatic calculation is done
I wish tat source version 2.07 was available, because there is some improvement done
although there is still that error in the line 428 which calculates wrong stack alignment
regards
From what I've seen trying to use jwasm so-far there are a few things that need to be done.
(Ive spoken to Japheth about them as well but he's seriously time constrained to get round to any of this).
Here is the state of things as I understand it:
1) The ABI says that at least 4 qwords should be reserved as shadow space for parameters. Where this doesn't make sense to me (and unless i'm missing something the actually ABI is wrong)...
parameters are passed in RCX,RDX,R8,R9 .. for integers/pointers... floats in XMM0-XMM3 ... so surely there should be more space reserved to handle shadowing float/double params?
2) We really need the ability to align LOCALS (automatically for qwords,dwords etc) but explicitly for structs so that we can say LOCAL mySIMDVar:_m128:ALIGN 16 (and the rsp is accounted for correctly).
3) JWASM needs to ensure that RSP is aligned 16 on start and stays that way through all procedures to allow 2 to work.
4) Invoke shouldn't touch the stack at all, invokes parameters should be summed up and the shadow space reserved accordingly in the parent proc.
5) I think that in some cases jwasm isn't inserting things into the stack space correctly to ensure alignment of qwords, dwords etc..
My main concern with all of the above while testing, is that we should try to emulate the output of VC as closely as possible. At present i cannot use profilers or visual studio debugger with 64bit jwasm apps as the locals and parameters aren't not being picked up based on where VS expects them to be in the stack frame.
In the posted code update above, maxparams isn't defined anywhere, I just made it static int maxparams = 0 ? in the same file...
Compiles fine with PellesC + Makefile.. however when i now try to assemble the source (as you provided) or any other I get this error:
test64.asm(51) : Error A2172: Initializer magnitude too large
and line 51 is:
WinMainCRTStartup endp
??
Ok, I tried to re-compile the original (un-modded) source and that same error occurred. Thats with Pelles C + make.
I switched to using the MSVC.Mak with VS2010, no problems at all.. compiles perfectly, and the jwasm exe assembles the provided source fine.
However.. I still see no locals,params at all in Visual Studio :(
Hi johnsa,
you can not see them because JWasm doesn't support them yet
CodeView V8 symbolic debugging information for output format COFF is in the pipeline for the next release
for now you have to put them in registers for debugging purpose
and when everything works fine you can put them back in to variables
I am still working on automatic calculation and allocation of required shadow space
when I finish I will post it here
regards
Quote from: johnsa on March 01, 2012, 11:31:35 AM
1) The ABI says that at least 4 qwords should be reserved as shadow space for parameters. Where this doesn't make sense to me (and unless i'm missing something the actually ABI is wrong)...
parameters are passed in RCX,RDX,R8,R9 .. for integers/pointers... floats in XMM0-XMM3 ... so surely there should be more space reserved to handle shadowing float/double params?
Perhaps I've misunderstood the ABI then, as I read it only the 4 integer registers are copied to shadow space and that only 32 bytes must be reserved, with additional space used by pushing parameters onto the stack when the number of parameters exceeds 4. Floating point and doubles are not copied to shadow space at all and are accessed directly from the XMM registers, the corresponding integer register is ignored but its slot is still used (the register value is ignored). The diagram for x64 stack usage shows this quite clearly.
http://msdn.microsoft.com/en-us/library/ew5tede7.aspx
(http://i.msdn.microsoft.com/dynimg/IC386808.png)
Edgar
Donkey, I agree with what you're saying the ABI does make it clear that ONLY the integers are shadowed, which means a reservation on the stack of 4*8... my point is that to me the actual abi is wrong..
imagine..
myProc proc var1:REAL4, var2:REAL4, var3:REAL4, var4:REAL4
fld dword ptr var1
fmul FP4(10.0)
fstp dword ptr var1
ret
myProc endp
for example...
this is now totally cocked because of the abi,
fld dword ptr xmm0 isn't going to work very well :)
granted, knowing that its in xmm0 means you could code around it.. but to be honest i think the fastcall convention was a really stupid decision on MS's part.. it adds unnecessary complexity and confusion, and doesn't really offer any real speed benefit.. what would have made more sense is to use a modified stdcall that does the same smart stack allocation (ie: max called param count) and then used movs to put the values on stack instead of push/pop and procedures access all params from the stack as per stdcall.
As for JWasm not supporting CodeView V8 symbolic debug info... I've been getting some form of results thus-far:
c:\jwasm\jwasm -c -Zi -Zd -win64 -Zp8 test64.asm
link /subsystem:windows /debug /pdb:test64.pdb /Libpath:"C:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\Lib\x64" /Libpath:"c:\masm32\lib" test64.obj
When viewed in visual studio 2010 debug mode gives me the attached scrn-shot. You'll see there are params and locals listed, their values are just bogus because VS can't determine the correct stack location for them.
Oddly however, when i assemble another file exactly the same way.. i then don't get any local/params at all... its a bit flaky.
(//)
I see it's says v8 support for next release on the jwasm site... i wonder if that means 2.07 or 2.08... if it's not in 2.07 i don't think i can wait much longer... :(
Really am going to land up with 2 options here, move to porting my app to C/C++ 64bit (yuck) ... or write my own assembler.. neither option really appeals to me..
Looking at some more VC disasm.. it appears C shadows the floats etc. to the stack as well.. to ensure things like fld will still work..
This seems to be in contravention to the ABI but makes perfect sense..
void MyFunc2(float a)
{
000000013FEE1610 movss dword ptr [rsp+8],xmm0
000000013FEE1616 push rdi
a = a + 1.0f;
000000013FEE1617 movss xmm0,dword ptr [a]
000000013FEE161D addss xmm0,dword ptr [__real@3f800000 (13FEE78FCh)]
000000013FEE1625 movss dword ptr [a],xmm0
}
000000013FEE162B pop rdi
000000013FEE162C ret
That behaviour is specified in the ABI, C will shadow the floats because of its vararg type function calls, the float value must be available in both the XMM register and the integer register so it will always be shadowed if it is in the first 4 arguments.
http://msdn.microsoft.com/en-us/library/dd2wa36c%28v=vs.80%29.aspx
I have successfully build what I promised (automatic calculation of shadow space)
now we don't need to use the local shadows
here are the compressed binaries
Warning for this build:
this is build of JWasm2.06e source and it was built with PellesC
it can build up to 4096 functions
every function must have RET or IRET at the end
so for example :
WinMainCRTStartup proc FRAME
invoke GetModuleHandleA, NULL
mov hInstance, rax
invoke GetCommandLineA
mov CommandLine, rax
invoke WinMain, hInstance, NULL, CommandLine, SW_SHOWDEFAULT
invoke ExitProcess, eax
WinMainCRTStartup endp
will not have shadow space
but
WinMainCRTStartup proc FRAME
invoke GetModuleHandleA, NULL
mov hInstance, rax
invoke GetCommandLineA
mov CommandLine, rax
invoke WinMain, hInstance, NULL, CommandLine, SW_SHOWDEFAULT
invoke ExitProcess, eax
ret
WinMainCRTStartup endp
will have
if anybody is interested in source changes I will post it here
I hope you'll enjoy
regards
Good work!
Still picking up a few things.
If i do identical asm and C codes.. the C (under VS2010 subs 288 bytes from RSP) where as the identical ASM (same params, same locals) does a sub RSP,224
So I'm not sure if it's missing some space for alignment or something..
WinMainX proc FRAME hInst:HINSTANCE, hPrevInst:HINSTANCE, CmdLine:LPSTR, CmdShow:UINT
local wc:WNDCLASSEXA
local msg:MSG
local hwnd:HWND
mov hInst, rcx
mov wc.cbSize, SIZEOF WNDCLASSEXA
mov wc.style, CS_HREDRAW or CS_VREDRAW
a2m wc.lpfnWndProc, WndProc
mov wc.cbClsExtra, NULL
mov wc.cbWndExtra, NULL
mov wc.hInstance, rcx
mov wc.hbrBackground, COLOR_WINDOW+1
mov wc.lpszMenuName, NULL
a2m wc.lpszClassName, ClassName
invoke LoadIconA, NULL, IDI_APPLICATION
mov wc.hIcon, rax
mov wc.hIconSm, rax
invoke LoadCursorA, NULL, IDC_ARROW
mov wc.hCursor,rax
invoke RegisterClassExA, addr wc
invoke CreateWindowExA, NULL, ADDR ClassName, ADDR AppName,\
WS_OVERLAPPEDWINDOW, CW_USEDEFAULT,\
CW_USEDEFAULT, CW_USEDEFAULT,CW_USEDEFAULT, NULL, NULL,\
hInst, NULL
mov hwnd,rax
invoke ShowWindow, hwnd, SW_SHOWNORMAL
invoke UpdateWindow, hwnd
.while (1)
invoke GetMessageA, ADDR msg, NULL, 0, 0
.break .if (!rax)
invoke TranslateMessage, ADDR msg
invoke DispatchMessageA, ADDR msg
.endw
mov rax, msg.wParam
ret
WinMainX endp
and the C one..
int APIENTRY _tWinMain(HINSTANCE hInstance,
HINSTANCE hPrevInstance,
LPTSTR lpCmdLine,
int nCmdShow)
{
UNREFERENCED_PARAMETER(hPrevInstance);
UNREFERENCED_PARAMETER(lpCmdLine);
// TODO: Place code here.
MSG msg;
HWND hWnd;
WNDCLASSEX wcex;
wcex.cbSize = sizeof(WNDCLASSEX);
wcex.style = CS_HREDRAW | CS_VREDRAW;
wcex.lpfnWndProc = WndProc;
wcex.cbClsExtra = 0;
wcex.cbWndExtra = 0;
wcex.hInstance = hInstance;
wcex.hIcon = LoadIcon(hInstance, MAKEINTRESOURCE(IDI_TESTC64));
wcex.hCursor = LoadCursor(NULL, IDC_ARROW);
wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1);
wcex.lpszMenuName = MAKEINTRESOURCE(IDC_TESTC64);
wcex.lpszClassName = szWindowClass;
wcex.hIconSm = LoadIcon(wcex.hInstance, MAKEINTRESOURCE(IDI_SMALL));
RegisterClassEx(&wcex);
hInst = hInstance; // Store instance handle in our global variable
hWnd = CreateWindowEx(NULL,szWindowClass, szTitle, WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, 0, CW_USEDEFAULT, 0, NULL, NULL, hInstance, NULL);
ShowWindow(hWnd, nCmdShow);
UpdateWindow(hWnd);
return (int) msg.wParam;
}
In fact switching the C code between CreateWindow and CreateWindowEx (which has an extra param) still leaves the RSP subtraction at 120h (288).
So it would appear that it rounds it up to some sort of multiple?
If this is working 100% it would be great to get it into Japheth's 2.07 release....
All we're missing then is an option to align a local struct and codeview/debugging to work...
With both of the above the stack reservation should be ? (If I'm not mistaken)
LOCALS:
MSG(48 bytes)
HWND(8 bytes)
WNDCLASSEX(80 bytes)
PARAMS:
Maximum is 12 (min would be 4) so 12*8: 96bytes
That gives me... 232 bytes (as opposed to 224 OR 288)
It should be 240 and it is 0xF0
because 232 is 0xE8 which is bad alignment thus it is added 8 bites to 240
and that is what I get when I look at disassembly with WinDbg
I don't know how did you get 224 except if you used the version which you compiled
please, use the precompiled version I posted here and do not need to use "AND RSP, -16"
because Jwasm takes care of stack alignment
VS2010 with 288 exaggerated, that is why we love assembly language
Japheth's 2.07 source is not availible
However, as soon as it becomes I will modify it and post it (if maestro Japheth doesn't come with something better than that)
Quote from: johnsa on March 05, 2012, 09:11:09 AM
In fact switching the C code between CreateWindow and CreateWindowEx (which has an extra param) still leaves the RSP subtraction at 120h (288).
So it would appear that it rounds it up to some sort of multiple?
Hi,
CreateWindow is not a function, it's a macro. :wink
#define CreateWindowA(lpClassName, lpWindowName, dwStyle, x, y,\
nWidth, nHeight, hWndParent, hMenu, hInstance, lpParam)\
CreateWindowExA(0L, lpClassName, lpWindowName, dwStyle, x, y,\
nWidth, nHeight, hWndParent, hMenu, hInstance, lpParam)
#define CreateWindowW(lpClassName, lpWindowName, dwStyle, x, y,\
nWidth, nHeight, hWndParent, hMenu, hInstance, lpParam)\
CreateWindowExW(0L, lpClassName, lpWindowName, dwStyle, x, y,\
nWidth, nHeight, hWndParent, hMenu, hInstance, lpParam)
#ifdef UNICODE
#define CreateWindow CreateWindowW
#else
#define CreateWindow CreateWindowA
#endif // !UNICODE
Regards
Greenhorn
I get 224 bytes... using the pre-compiled jwasm.exe you've supplied.
built with:
jwasm -c -Zi -Zf -Zd -win64 test64.asm
link /machine:x64 /subsystem:windows /entry:WinMainCRTStartup /debug /Libpath:"C:\Program Files (x86)\Microsoft SDKs\Windows\v7.0A\Lib\x64" test64.obj
Dis-asm listing when debugged with VS2010...
000000013F0F1087 mov qword ptr [rsp+8],rcx
000000013F0F108C mov qword ptr [rsp+10h],rdx
000000013F0F1091 mov qword ptr [rsp+18h],r8
000000013F0F1096 mov qword ptr [rsp+20h],r9
000000013F0F109B push rbp
000000013F0F109C mov rbp,rsp
000000013F0F109F sub rsp,0E0h
Source code is as posted for the example app.
just to make it more clear (I obviously removed the shadow entries from the source):
WinMain proc FRAME hInst:HINSTANCE, hPrevInst:HINSTANCE, CmdLine:LPSTR, CmdShow:UINT
local wc:WNDCLASSEXA
local msg:MSG
local hwnd:HWND
mov hInst, rcx
000000013F091087 mov qword ptr [rsp+8],rcx
000000013F09108C mov qword ptr [rsp+10h],rdx
000000013F091091 mov qword ptr [rsp+18h],r8
000000013F091096 mov qword ptr [rsp+20h],r9
000000013F09109B push rbp
000000013F09109C mov rbp,rsp
000000013F09109F sub rsp,0E0h
000000013F0910A6 mov qword ptr [rbp+10h],rcx
mov wc.cbSize, SIZEOF WNDCLASSEXA
000000013F0910AA mov dword ptr [rbp-50h],50h
try this : -c -win64 -Zp8 -D_WIN64 -I -Zi -Zd
-Zp8 is 8 byte alignment and without it it comes to E0 with it is F0
here are modifications to JWasm sources
file:
globals.h
line: 675
/* global variables */
extern char maxparams;
extern char *CurrMaxParams[];
file:
invoke.c
line: 28
extern char maxparams; // thiss is needed here only for PellesC
line: 148
static void ms64_fcstart( struct dsym *proc, int numparams, int start, struct asm_tok tokenarray[], int *value )
/**************************************************************************************************************/
{
/* v2.04: VARARG didn't work */
if ( proc->e.procinfo->is_vararg ) {
//numparams = ( tokenarray[start].token != T_FINAL ? 1 : 0 );
for ( numparams = 0; tokenarray[start].token != T_FINAL; start++ )
if ( tokenarray[start].token == T_COMMA )
numparams++;
}
DebugMsg1(("ms64_fcstart(%s, numparams=%u) vararg=%u\n", proc->sym.name, numparams, proc->e.procinfo->is_vararg ));
if ( numparams < 4 )
numparams = 4;
else if ( numparams & 1 )
numparams++;
*value = numparams;
if (numparams > maxparams ) maxparams = numparams; //if there is more arguments ajust maxparams
//AddLineQueueX( " sub %r, %d", T_RSP, numparams * 8 ); here we prevent changing the stack
return;
}
static void ms64_fcend( struct dsym *proc, int numparams, int value )
/*******************************************************************/
{
/* use <value>, which has been set by ms64_fcstart() */
//AddLineQueueX( " add %r, %d", T_RSP, value * 8 ); here ve prevent changing the stack
return;
}
file:
proc.c
line: 77
char maxparams; /*max number of parameters in invoke calls */
char maxparams1; /*max number of parameters in invoke calls to ajust even */
char *CurrMaxParams[0x1000]; /*this array holds max number of shadow spaces
/*parameters for each functiuon,*/
/*enough to acomodate 4096 functions, you can change it if you know what are you doing */
line: 420
#if AMD64_SUPPORT
/* adjust start displacement for Win64 FRAME procs.
* v2.06: the list may contain xmm registers, which have size 16!
*/
if ( info->isframe ) {
uint_16 *regs = info->regslist;
int sizestd = 0;
int sizexmm = 0;
if ( regs )
for( cnt = *regs++; cnt; cnt--, regs++ )
if ( GetValueSp( *regs ) & OP_XMM )
sizexmm += 16;
else
sizestd += 8;
displ = sizexmm + sizestd;
if (( sizestd & 0xf ) && sizexmm)
displ += 8;
}
#endif
line: 1980
static ret_code write_win64_default_prologue( struct proc_info *info )
/********************************************************************/
{
uint_16 *regist;
int sizestd = 0;
int sizexmm = 0;
DebugMsg1(("write_win64_default_prologue enter\n"));
PushLineQueue();
if ( ModuleInfo.win64_saveparams )
win64_SaveRegParams( info );
/*
* PUSH RBP
* .PUSHREG RBP
* MOV RBP, RSP
* .SETFRAME RBP, 0
*/
AddLineQueueX( "push %r", T_RBP );
AddLineQueueX( "%r %r", T_DOT_PUSHREG, T_RBP );
AddLineQueueX( "mov %r, %r", T_RBP, T_RSP );
AddLineQueueX( "%r %r, 0", T_DOT_SETFRAME, T_RBP );
/* after the "push rbp", the stack is xmmword aligned */
/* Push the registers */
if( info->regslist ) {
int cnt;
regist = info->regslist;
for( cnt = *regist++; cnt; cnt--, regist++ ) {
//int i;
if ( GetValueSp( *regist ) & OP_XMM ) {
sizexmm += 16;
} else {
sizestd += 8;
AddLineQueueX( "push %r", *regist );
if ( ( 1 << GetRegNo( *regist ) ) & win64_nvgpr ) {
AddLineQueueX( "%r %r", T_DOT_PUSHREG, *regist );
}
}
} /* end for */
DebugMsg1(("write_win64_default_prologue: sizestd=%u, sizexmm=%u\n", sizestd, sizexmm ));
sizestd &= 0xF; /* result will be 8 or 0. Just this amount is needed below */
#if 1
/* save xmm registers */
if ( sizexmm ) {
int i;
AddLineQueueX( "sub %r, %d", T_RSP, NUMQUAL sizexmm + sizestd );
AddLineQueueX( "%r %d", T_DOT_ALLOCSTACK, NUMQUAL sizexmm + sizestd );
sizestd = 0; /* stack is aligned now. Don't use sizestd anymore */
regist = info->regslist;
for( cnt = *regist++, i = 0; cnt; cnt--, regist++ ) {
if ( GetValueSp( *regist ) & OP_XMM ) {
AddLineQueueX( "movdqa [%r+%u], %r", T_RSP, NUMQUAL i, *regist );
if ( ( 1 << GetRegNo( *regist ) ) & win64_nvxmm ) {
AddLineQueueX( "%r %r, %u", T_DOT_SAVEXMM128, *regist, NUMQUAL i );
}
i += 16;
}
}
}
#endif
}
info->localsize = ROUND_UP( info->localsize, CurrWordSize );
/* alloc space for local variables and align the stack. */
maxparams1 = (char)CurrMaxParams[procidx];
if( info->localsize + sizestd || maxparams1 ) {
/* align the stack if necessary. */
if ( ( sizestd && (!(info->localsize & 0xF ) ) ) ||
( sizestd == 0 && (info->localsize & 0xF ) ) )
info->localsize += 8;
if(maxparams1 & 1) maxparams1++;
if(maxparams1 < 4) maxparams1 += 4;
DebugMsg1(("write_win64_default_prologue: localsize=%u, sizestd=%u\n", info->localsize, sizestd ));
/*
* SUB RSP, localsize
* .ALLOCSTACK localsize
*/
AddLineQueueX( "sub %r, %d", T_RSP, NUMQUAL info->localsize + (maxparams1 * 8));
AddLineQueueX( "%r %d", T_DOT_ALLOCSTACK, NUMQUAL info->localsize + (maxparams1 * 8));
}
AddLineQueueX( "%r", T_DOT_ENDPROLOG );
line: 2559
static void write_win64_default_epilogue( struct proc_info *info )
/****************************************************************/
{
uint sizexmm = 0;
uint sizestd = 0;
/* restore non-volatile xmm registers */
if ( info->regslist ) {
uint_16 *regist = info->regslist;
int cnt;
for( cnt = *regist++; cnt; cnt--, regist++ ) {
if ( GetValueSp( *regist ) & OP_XMM ) {
AddLineQueueX( "movdqa %r, [%r+%u]", *regist, stackreg[ModuleInfo.Ofssize], NUMQUAL info->localsize + sizexmm );
sizexmm += 16;
} else
sizestd += 8;
}
}
sizestd &= 0xf;
/* v2.06: must match alignment of prologue! */
if ( sizexmm && sizestd ) sizexmm += sizestd;
//sprintf( buffer, "add %s, %d", GetResWName( stackreg[ModuleInfo.Ofssize], NULL ), info->localsize + sizexmm + sizestd );
AddLineQueueX( "add %r, %d", stackreg[ModuleInfo.Ofssize], NUMQUAL info->localsize + sizexmm + (maxparams1 * 8));
CurrMaxParams[procidx] = (char*) maxparams; //at the end of the function we have corect number of params in maxparams
maxparams = 0; //reset maxparams for the next function
pop_register( CurrProc->e.procinfo->regslist );
AddLineQueueX( "pop %r", basereg[ModuleInfo.Ofssize] );
return;
}
and here is again JWasm.exe
Assembling with Zp8 does indeed produce 0f0h (240)
so basically it's padding any structure member less than a qword up to a qword when used as a local (on the stack) to ensure correct alignment of each member variable. Which is great.
All we need now is an option to ensure that the address (RSP or RBP as used) for a particular stack variable can be aligned. This should then ensure that we can use movaps, movdqa for any local SIMD var.
I'm not sure how people feel about the syntax of this, I think it could be specified on the LOCAL itself or on the struct definition?
I tend to prefer on the local, as it means I'm free to use the struct elsewhere without the alignment and we can create a LOCAL using a different form.
IE:
LOCAL MyVector:_m128:ALIGN 16
or
LOCAL MyVector[4]:DWORD:ALIGN 16
That and Codeview v8 support.. and I think 64bit asm is just about set!
If you look at this "static ret_code write_win64_default_prologue( struct proc_info *info )" function you will see that stack is being aligned to 16 bytes
so your wishes have been fulfilled
there is only Codeview v8 support pending and you will be fully satisfied
However, we have to wait for japheth to come back for that
It will probably take long time for him to finish that, but for now we have pretty good tools to work with
this version satisfy me fully and I am happy to have it
best regards
Hey,
The stack is aligned, but what happens with the prologue generation if I were to do this:
MyProc proc a:DWORD, b:DWORD
LOCAL a:_mm128
LOCAL b:REAL4
LOCAL c:_mm128
movdqa a,xmm0
movss b,xmm1
movaps c,xmm2
ret
MyProc endp
for example.. is it ensuring that a's address is 16 byte aligned, AND c, noting that it has a 4 byte value stuck in-between? If it's doing that already... then SERIOUS hats off to you sir :)
I guess as a work-around if need be.. you could just ensure you create all the LOCALs which need to be aligned at the beginning.. IE:
LOCAL a:_mm128
LOCAL b:_mm128
LOCAL c:REAL4
that way each is 16 bytes in sized which should maintain the stack alignment?
how does it look to you? OK?
__mm128i struct
i0 DWORD ?
i1 DWORD ?
i2 DWORD ?
i3 DWORD ?
__mm128i ends
_mm128i typedef __mm128i
__mm128f struct
f0 real4 ?
f1 real4 ?
f2 real4 ?
f3 real4 ?
__mm128f ends
_mm128f typedef __mm128f
_mm128 union
i32 _mm128i <>
f32 _mm128f <>
_mm128 ends
MyProc proc ar:DWORD, br:DWORD
LOCAL a:_mm128
LOCAL b:REAL4
LOCAL ci:_mm128
movdqa a,xmm0
movss b,xmm1
movaps ci,xmm2
ret
MyProc endp
proctest!MyProc:
00000001`40001040 48894c2408 mov qword ptr [rsp+8],rcx
00000001`40001045 4889542410 mov qword ptr [rsp+10h],rdx
00000001`4000104a 55 push rbp
00000001`4000104b 488bec mov rbp,rsp
00000001`4000104e 4883ec28 sub rsp,28h
00000001`40001052 660f7f45f0 movdqa xmmword ptr [rbp-10h],xmm0
00000001`40001057 f30f114dec movss dword ptr [rbp-14h],xmm1
00000001`4000105c 0f2955d8 movaps xmmword ptr [rbp-28h],xmm2
00000001`40001060 c9 leave
00000001`40001061 c3 ret
but it causes access violation
MyProc proc ar:DWORD, br:DWORD
LOCAL a:_mm128
LOCAL ci:_mm128
LOCAL b:REAL4
movdqa a,xmm0
movss b,xmm1
movaps ci,xmm2
ret
MyProc endp
this is OK
It looks lovely :) but.. i get an access violation when i run it..
MyProc proc ar:DWORD, br:DWORD
000000013F1E1248 mov qword ptr [rsp+8],rcx
000000013F1E124D mov qword ptr [rsp+10h],rdx
000000013F1E1252 push rbp
000000013F1E1253 mov rbp,rsp
000000013F1E1256 sub rsp,28h
LOCAL a:_mm128
LOCAL b:REAL4
LOCAL ci:_mm128
movdqa a,xmm0
000000013F1E125A movdqa xmmword ptr [rbp-10h],xmm0
movss b,xmm1
000000013F1E125F movss dword ptr [rbp-14h],xmm1
movaps ci,xmm2
000000013F1E1264 movaps xmmword ptr [rbp-28h],xmm2 ; <-- Access Violation Here.. rbp-28h isn't 16 aligned.
ret
000000013F1E1268 leave
000000013F1E1269 ret
I've asked Japheth to please integrate your changes based on this thread into 2.07, and i'm just waiting for him to come back on what debug data is currently generated in the COFF OBJ, I'm assuming its the older V4 format?
In which case what his time lines are looking like. Perhaps it would be worth the effort to build a separate utility to update the OBJ to be at least more V8 compatible if it's going to take him too long.
Swapping b and ci around fixes it.. 28h = 40 which isn't / 16 .. so if RBP is aligned to 16, RBP-28h can't be.
If we are assembler programmers we should be aware of our code's products and of our tools capability
and anyway we should check what the compiler does with our code
I hope that Japhet has got some time spare to work on the JWasm2.07
regards
Agreed. For now I can totally live with the LOCAL issue, if and when I create a simd local, i'll just group them together first (after prologue) as that way they're aligned perfectly.
I'm really hoping for a bumper 2.07 release with all of this stuff factored in plus all of his bug-fixes.
I'd even wait another month or two for it if we could get symbolic debugging.
Here is some more improvements of JWasm
When you have option win64:1 first 4 registers are saved to shadow spaces
but without checking if parameters have being used or not
sometimes we use only register direct and don't need shadow spaces
and it saves amount of code written and increases the speed
this version saves only parameters which you use in the function
here is changed source:
file:
proc.c
line 1958
static void win64_SaveRegParams( struct proc_info *info )
/*******************************************************/
{
int i;
struct dsym *param;
for ( i = 0, param = info->paralist; param && ( i < 4 ); i++ ) {
/* v2.05: save XMMx if type is float/double */
if ( param->sym.is_vararg == FALSE ) {
if ( (param->sym.mem_type & MT_FLOAT) && param->sym.used ) // added && param->sym.used
AddLineQueueX( "movq [%r+%u], %r", T_RSP, 8 + i * 8, T_XMM0 + i );
else
if(param->sym.used) //here as well
AddLineQueueX( "mov [%r+%u], %r", T_RSP, 8 + i * 8, ms64_regs[i] );
param = param->nextparam;
}
}
return;
}
and here are binaries:
More good work there!
Spoke to Japheth, he's primarily focused on jwlink at present, so that's receiving any attention that his time permits. So to be honest i'm really not sure when we can expect 2.07 let alone the CV8 features. I mentioned my idea of perhaps "tweaking" the OBJ file
to be more compatible with V8, but he reckons it can only be done from inside jwasm using it's structures/tables to produce the correct output.
I asked if he would factor these changes into 2.07 (as listed in this thread) but he says he doesn't want to include any more in that release. I'm hoping at least then he will release the source so that the changes can be factored in.
I've download the latest PE/COFF spec (so I'm assuming that should comform to cv8 even though they don't mention it by name).
Are you compiling this in VC or PellesC?
I thought maybe I should put my C hatred aside and join in and see if we can start getting the CV8 output.
I know I'm insane.. but I actually use asm for some commercial work. I have a couple projects which are in excess of 100k lines, so to be able to debug stuff comfortably is quite key for me. 32bit has been beautiful, but it's time for me to start porting some of these commercial works to 64bit. Heck.. When I think how much i've paid over time for things like Visual Studio, and so-far I've had to pay nothing for my asm coding, I'd be more than happy to PAY someone to get this done quick :)
I am using both VC10 EXPRES and also PellesC but PellesC.mak running with console - not studio and it does all the job properly
let us work together
I am still looking for improvement of this source
Have you been able to get it to build inside the IDE (without make file)?
I've tried several times now with both and it's just an endless list of issues..
I did not try PellesC IDE, only PellesC.mak from the JWasm source bfolder
But VC2010 Expres works fine you just have to tweak it
here is what I have:
you should be able to build it easy with PellesC.mak if you open the command line from JWasm folder and you set C:\PellesC\bin\pomake -f PellesC.mak
Hey,
Yeah I can get a perfect build from command line using the make file...
I wanted to be able to run from IDE/debug mode to actual step through and see whats going on..
Tried your solution file, gave me the same 4400 errors...
things like structs that aren't valid...
Error 2 error C2016: C requires that a struct or union has at least one member e:\jwasmsolution\globals.h 504 1 JWasm
Error 3 error C2061: syntax error : identifier 'bool' e:\jwasmsolution\globals.h 504 1 JWasm
Error 4 error C2061: syntax error : identifier 'line_numbers' e:\jwasmsolution\globals.h 505 1 JWasm
Error 5 error C2059: syntax error : ';' e:\jwasmsolution\globals.h 505 1 JWasm
Error 6 error C2061: syntax error : identifier 'warning_error' e:\jwasmsolution\globals.h 513 1 JWasm
Error 7 error C2059: syntax error : ';' e:\jwasmsolution\globals.h 513 1 JWasm
Error 8 error C2061: syntax error : identifier 'no_comment_data_in_code_records' e:\jwasmsolution\globals.h 526 1 JWasm
and so on.. 4400 times before the compiler just gave up
If you want I can send you compressed folder with my solution inside
I have your email address
I have succeeded to build debug version and it works
In the folder is solution set up for debug version
That would be great thanks! Then I'll start investigating the CV8 stuff. Are you code changes already in the source? (Saves me re-applying them).
they are, did you check your email?
Yeah.. nothing came through at all... you can also try meeku.jh@gmail.com
and now?
Got it thanks!
And.. with only a few mods it builds and runs from inside the IDE!
Awesome.. I'll get cracking on hunting down the stuff for CV8.
:8) good luck
I think i'm going to need it... lol
CV8 is undocumented, I see it's been partially implemented (at least some types and structs).
I found this: http://www.hackchina.com/en/r/48474/yasm-0.7.1-_-modules-_-dbgfmts-_-codeview-_-cv8.txt__html
Which is helpful.
Interesting
that was taken from yasm
I downloaded yasm source and it is there
maybe you should look how it is implemented there
regards
FYI...
http://www.fantastictimes.co.za/CodeView.pdf
http://www.fantastictimes.co.za/pecoff_v8.docx
http://www.fantastictimes.co.za/specs.html
I happen to have some very old MSDN collections lying around, and after much trawling.. found the elusive "Visual C++ 5.0 Symbolic Debug Information Specification" as linked above.
I extracted the CHM into a site there.
Still no CV8, BUT it can in theory be gleaned by updating CV5 with these notes
http://www.hackchina.com/en/r/48474/yasm-0.7.1-_-modules-_-dbgfmts-_-codeview-_-cv8.txt__html
I remembered that strap89 wrote pecvt for fasm in 2009
http://board.flatassembler.net/topic.php?p=89369
it is interesting to look here how he implemented it in fasm
here it is
Hi guys !
I see you are doing some serious work in absence of "Japheth". :wink
Could some of you make Jwasm able to switch between 64/32 bit code in the source file ?
Jwasm has ".x64" directive but seems get ignored if other directive implying 32 bit precedes it.
I wanna make some mixed code like:
include \masm32\include\masm32rt.inc
.code
start:
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
call main
inkey
exit
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
main proc
db 9ah ;call 33h:proc64 - JWASM dosn't understand this have to be hard coded -
dd proc64
dw 33h
cls
print "Back to 32-bit"
ret
main endp
;64 - bit
.x64
align 16
proc64:
xor rax, rax
retf
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
end start
Thanks beforehand.
Hi,
Not sure exactly what you're trying to achieve here?
You can't run a mixed 32/64 exe under windows..
If you're writing code that you'd want to use in a OS boot-loader etc, where you'd want to switch modes, jwasm should pick up what type of code to generate from the segment type.
.x64 is just going to enable assembly of 64bit instructions/registers etc depending on the output format.
So you'd use something like
Code16 segment para public use16 'code'
and Code64 segment para public use64 'code'
Theres an example file in the jwasm samples Dos64.asm ..
johnsa, tank you for answering
that is what I wanted to answer
BTW, how are you going with V8?
regards
Quote from: johnsa on March 23, 2012, 09:26:58 AM
Not sure exactly what you're trying to achieve here?
You can't run a mixed 32/64 exe under windows..
Of curse you can. :toothy
See http://vxheavens.com/lib/vrg16.html
I tried with YASM - the only assambler support mixed code sofar I was able to find, NASM understands "call 0x33:proc64" but can't switch between the modes, I don't know about FASM -
it works like a charm can use 64-bit regs at will. :P
x64 code in 32 bit process (http://www.masm32.com/board/index.php?topic=18556.0)
Thanks qWord! :U
I didn't know you need "x64 SEGMENT EXECUTE USE64" before ".x64" that was the missing piece of data. :red
Asking for some implementation what allready given makes my request above NULL. :P
I had no idea you could do this!
Very interesting :)
So you'd write a normal 32bit app, run it under win x64, and just make calls for blocks of 64bit code.. much like pmode under dos you could do that once, and then have macros that thunk back to ur code32 to make API calls.. I had that in my dos extender... hmm
I'm not sure what the use of this is tho? I'd probably just stick to one or the other.. as I'm sure the call-gate has a massive performance penalty.. so it's not something you'd want to be doing often.
Quote from: johnsa on March 24, 2012, 07:23:41 PM
I'm not sure what the use of this is tho?
Good question indeed. More interesting would be the other way round: A stub in 64-bit that allows you to use your stock of 32-bit libraries...
Oh heck yeah.. that would be brilliant :)
I wonder if there's another call-gate or if that one works from 64->32..
seems to me it would be more trouble than it's worth - lol
just write the thing in 32-bit :U
i know - i am a paaaaaaaarty-pooper
if you want a real neat trick...
write some code so i can execute 64-bit stuff under a 32-bit OS, provided i have a suitable CPU :U
now, you have something to be excited about - lol
Quote from: johnsa on March 24, 2012, 07:37:11 PM
Oh heck yeah.. that would be brilliant :)
I wonder if there's another call-gate or if that one works from 64->32..
Ray Chen (http://blogs.msdn.com/b/oldnewthing/archive/2008/10/20/9006720.aspx) tries desperately to sell the message "it's not possible". One thing is sure: it's not in the interest of Microsoft and the hardware industry...
If I really had to write a wrapper for letting a 64-bit dll access my 32-bit libraries, WM_COPYDATA and memory-mapped files would be high on my list of ingredients :bg
well, that is assembler: severely restricted portability :P
Hi
I have developed this MACRO for JWasm users
it can be nested as many time as needed, JWasm has no limit 20
it is based on .REPEAT and .UNTIL FALSE
it can be used for x86 or x64
I think that it can also work with ML.EXE but not with ml64.exe
any improvements welcome
usage:
NO SPACES ALLOWED BETWEEN ":"
this=begin:limit:step:--next
this=begin:limit:step:++next
EXAMPLE:
;count=src,dest,2,++count
;count=0:dest:8:++count
;rbx=24:0:1:--rbx
;rax=0:24:4:++rax
;if step = 0 'next' will be added or subtracted to 'this'
;count=src:dest:0:--rax
;count=src:dest:0:++src
;if step = 0 and no '--' or '++', 'this' = 'next' and will stop when this == limit
;count=src:dest:0:rax
;.FOR count=src:dest:1:++count
; nop
; .break .if (!count)
; nop
; .continue .if (!src)
; nop
; .continue
; nop
; .break
; mov eax,1
;.ENDFOR
;.forever
; nop
; .break .if (!eax)
; nop
;.endfor
option dotname ;;it wan't work without tis
TRUE = 1
FALSE = 0
.for MACRO these:vararg
LOCAL pm,mreg,rg,this,begin,step,limit,next,column1,column2,column3,iseq
LOCAL startloop
iseq EQU @InStr(1,<these>,<=>)
this TEXTEQU @SubStr(<these>,1, iseq-1)
column1 EQU @InStr(iseq+1,<these>,<:>)
begin TEXTEQU @SubStr(<these>,iseq+1,column1-iseq-1)
column2 EQU @InStr(column1+1,<these>,<:>)
limit TEXTEQU @SubStr(<these>,column1+1,column2-column1-1)
column3 EQU @InStr(column2+1,<these>,<:>)
step TEXTEQU @SubStr(<these>,column2+1,column3-column2-1)
pm TEXTEQU @SubStr(<these>,column3+1,2)
IFIDNI pm, <++>
next TEXTEQU @SubStr(<these>,column3+3)
ELSEIFIDNI pm, <-->
next TEXTEQU @SubStr(<these>,column3+3)
ELSE
next TEXTEQU @SubStr(<these>,column3+1)
ENDIF
rg = (OPATTR this) AND 127 ;;checking if 'this' is register
IF rg NE 48
rg = 0 ;;not register
ELSE
rg = 1 ;;yes
ENDIF
IFE rg
IFDEF _WIN64
mreg TEXTEQU <rax>
ELSE
mreg TEXTEQU <eax>
ENDIF
ENDIF
IFIDN <begin>,<0>
IF rg
mov this,next
ELSE
mov mreg,next
mov this,mreg
ENDIF
ELSE
IF rg
mov this,begin
ELSE
mov mreg,begin
mov this,mreg
ENDIF
ENDIF
jmp startloop
.repeat ;;here is the LOOP
IFIDN pm, <++>
IF rg
IFE step
add this,next
ELSE
add this,step
ENDIF
ELSE
mov mreg,this
IFE step
add mreg,next
ELSE
add mreg,step
ENDIF
mov this,mreg
ENDIF
ELSEIFIDN pm, <-->
IF rg
IFE step
sub this,next
ELSE
sub this,step
ENDIF
ELSE
mov mreg,this
IFE step
sub mreg,next
ELSE
sub mreg,step
ENDIF
mov this,mreg
ENDIF
ELSE
IF rg
mov this,next
ELSE
mov mreg,next
mov this,mreg
ENDIF
ENDIF
startloop:
IFIDN pm, <++>
IF rg
.break .if (this < limit)
ELSE
.break .if (mreg < limit)
ENDIF
ELSEIFIDN pm, <-->
IF rg
.break .if (this < limit)
ELSE
.break .if (mreg < limit)
ENDIF
ELSE
IF rg
.break .if (this == limit)
ELSE
.break .if (mreg == limit)
ENDIF
ENDIF
ENDM
.forever MACRO
.repeat
ENDM
.endfor MACRO
.until FALSE ;;LOOP forever
ENDM
.FOREVER EQU .forever
.Forever EQU .forever
.FOR EQU .for
.ENDFOR EQU .endfor
.For EQU .for
.Endfor EQU .endfor
_________________________
I added cool smileys to this message... if you don't see them go to: http://s.exps.me
_________________________
I added cool smileys to this message... if you don't see them go to: http://s.exps.me
I have fixed some bugs and now everything works properly
here is how it looks expanded by assembler:
.FOR count=src:dest:1:++count
nop
.break .if (!count)
nop
.continue .if (!src)
nop
.continue
nop
.break
mov eax,1
.ENDFOR
.for rsi=src:0:0:[rsi].AXPOINT.next
nop
.break
nop
.endfor
.forever
nop
.break .if (!eax)
nop
.endfor
;--------------------------------------------------------------
1080: .FOR count=src:dest:1:++count
000000000044B019 mov rax,qword ptr [rbp+18h]
000000000044B01D mov qword ptr [rbp+20h],rax
000000000044B021 jmp somefunction+29h (44B02Fh)
000000000044B023 mov rax,qword ptr [rbp+20h]
000000000044B027 add rax,1
000000000044B02B mov qword ptr [rbp+20h],rax
000000000044B02F cmp rax,qword ptr [rbp+10h]
000000000044B033 jb somefunction+4Ch (44B052h)
1081: nop
000000000044B035 nop
1082: .break .if (!count)
000000000044B036 cmp qword ptr [rbp+20h],0
000000000044B03B je somefunction+4Ch (44B052h)
1083: nop
000000000044B03D nop
1084: .continue .if (!src)
000000000044B03E cmp qword ptr [rbp+18h],0
000000000044B043 je somefunction+4Ah (44B050h)
1085: nop
000000000044B045 nop
1086: .continue
000000000044B046 jmp somefunction+4Ah (44B050h)
1087: nop
000000000044B048 nop
1088: .break
000000000044B049 jmp somefunction+4Ch (44B052h)
1089: mov eax,1
000000000044B04B mov eax,1
1090: .ENDFOR
000000000044B050 jmp somefunction+1Dh (44B023h)
;--------------------------------------------------------------
1092: .for rsi=src:0:0:[rsi].AXPOINT.next
000000000044B052 mov rsi,qword ptr [rbp+18h]
000000000044B056 jmp somefunction+55h (44B05Bh)
000000000044B058 mov rsi,qword ptr [rsi]
000000000044B05B or rsi,rsi
000000000044B05E je somefunction+60h (44B066h)
1093: nop
000000000044B060 nop
1094: .break
000000000044B061 jmp somefunction+60h (44B066h)
1095: nop
000000000044B063 nop
1096: .endfor
000000000044B064 jmp somefunction+52h (44B058h)
;--------------------------------------------------------------
1097: .forever
1098: nop
000000000044B066 nop
1099: .break .if (!eax)
000000000044B067 and eax,eax
000000000044B069 je somefunction+68h (44B06Eh)
1100: nop
000000000044B06B nop
1101: .endfor
000000000044B06C jmp somefunction+60h (44B066h)
;--------------------------------------------------------------
1104: .if (rcx != rdx)
000000000044B06E cmp rcx,rdx
some more examples:
1080: .FOR count=src:dest:8:--count
000000000044B019 mov rax,qword ptr [rbp+18h]
000000000044B01D mov qword ptr [rbp+20h],rax
000000000044B021 jmp somefunction+29h (44B02Fh)
000000000044B023 mov rax,qword ptr [rbp+20h]
000000000044B027 sub rax,8
000000000044B02B mov qword ptr [rbp+20h],rax
000000000044B02F cmp rax,qword ptr [rbp+10h]
000000000044B033 jb somefunction+4Ch (44B052h)
1081: nop
000000000044B035 nop
1082: .break .if (!count)
000000000044B036 cmp qword ptr [rbp+20h],0
000000000044B03B je somefunction+4Ch (44B052h)
1083: nop
000000000044B03D nop
1084: .continue .if (!src)
000000000044B03E cmp qword ptr [rbp+18h],0
000000000044B043 je somefunction+4Ah (44B050h)
1085: nop
000000000044B045 nop
1086: .continue
000000000044B046 jmp somefunction+4Ah (44B050h)
1087: nop
000000000044B048 nop
1088: .break
000000000044B049 jmp somefunction+4Ch (44B052h)
1089: mov eax,1
000000000044B04B mov eax,1
1090: .ENDFOR
000000000044B050 jmp somefunction+1Dh (44B023h)
1091:
1092: .for rsi=src:0:0:[rsi].AXPOINT.next
000000000044B052 mov rsi,qword ptr [rbp+18h]
or like this:
1080: .FOR rbx=src:dest:4:--rbx
000000000044B019 mov rbx,qword ptr [rbp+18h]
000000000044B01D jmp somefunction+1Dh (44B023h)
000000000044B01F sub rbx,4
000000000044B023 cmp rbx,qword ptr [rbp+10h]
000000000044B027 jb somefunction+40h (44B046h)
1081: nop
000000000044B029 nop
1082: .break .if (!count)
000000000044B02A cmp qword ptr [rbp+20h],0
000000000044B02F je somefunction+40h (44B046h)
1083: nop
000000000044B031 nop
1084: .continue .if (!src)
000000000044B032 cmp qword ptr [rbp+18h],0
000000000044B037 je somefunction+3Eh (44B044h)
1085: nop
000000000044B039 nop
1086: .continue
000000000044B03A jmp somefunction+3Eh (44B044h)
1087: nop
000000000044B03C nop
1088: .break
000000000044B03D jmp somefunction+40h (44B046h)
1089: mov eax,1
000000000044B03F mov eax,1
1090: .ENDFOR
000000000044B044 jmp somefunction+19h (44B01Fh)
1091:
1092: .for rsi=src:0:0:[rsi].AXPOINT.next
000000000044B046 mov rsi,qword ptr [rbp+18h]