issue when enabling -O2

classic Classic list List threaded Threaded
12 messages Options
Reply | Threaded
Open this post in threaded view
|

issue when enabling -O2

Matias Vara
Hello everyone, 

I am getting an exception when I enable the -O2 optimization. More precisaily, the line that stars with write_portd.... is corrupting the data section. This is the pascal code: 

function PciReadDword(const bus, device, func, regnum: UInt32): UInt32;
var
  Send: DWORD;
begin
  Send := $80000000 or (bus shl 16) or (device shl 11) or (func shl 8) or (regnum shl 2);
  write_portd(@Send, PCI_CONF_PORT_INDEX);
  read_portd(@Send, PCI_CONF_PORT_DATA);
  Result := Send;
end;  

which generates (without -02):

.section .text.n_arch_$$_pcireaddword$longword$longword$longword$longword$$longword,"x"
.balign 16,0x90
.globl ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD
ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD:
.Lc207:
.seh_proc ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD
.Ll464:
# [992] begin
pushq %rbp
.seh_pushreg %rbp
.Lc209:
.Lc210:
movq %rsp,%rbp
.Lc211:
leaq -80(%rsp),%rsp
.seh_stackalloc 80
.seh_endprologue
# Var bus located at rbp-8, size=OS_32
# Var device located at rbp-16, size=OS_32
# Var func located at rbp-24, size=OS_32
# Var regnum located at rbp-32, size=OS_32
# Var $result located at rbp-40, size=OS_32
# Var Send located at rbp-48, size=OS_32
movl %ecx,-8(%rbp)
movl %edx,-16(%rbp)
movl %r8d,-24(%rbp)
movl %r9d,-32(%rbp)
.Ll465:
# [993] Send := $80000000 or (bus shl 16) or (device shl 11) or (func shl 8) or (regnum shl 2);
movl -8(%rbp),%eax
shll $16,%eax
orl $2147483648,%eax
movl -16(%rbp),%edx
shll $11,%edx
orl %eax,%edx
movl -24(%rbp),%eax
shll $8,%eax
orl %edx,%eax
movl -32(%rbp),%edx
shll $2,%edx
orl %eax,%edx
movl %edx,-48(%rbp)
.Ll466:
# [995] write_portd(@Send, PCI_CONF_PORT_INDEX);
leaq -48(%rbp),%rcx
movl $3320,%edx
call ARCH_$$_WRITE_PORTD$POINTER$WORD
.Ll467:
# [996] read_portd(@Send, PCI_CONF_PORT_DATA);
leaq -48(%rbp),%rcx
movl $3324,%edx
call ARCH_$$_READ_PORTD$POINTER$WORD
.Ll468:
# [997] Result := Send;
movl -48(%rbp),%eax
movl %eax,-40(%rbp)
.Ll469:
# [998] end;
movl -40(%rbp),%eax
nop
leaq (%rbp),%rsp
popq %rbp
ret
.seh_endproc
.Lc208:
.Lt28:
.Ll470:

and with -O2:

.section .text.n_arch_$$_pciwriteword$word$word$word$word$word,"x"
.balign 16,0x90
.globl ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD
ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD:
.Lc148:
# Temps allocated between rbp-16 and rbp-8
.seh_proc ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD
.Ll471:
# [1014] begin
pushq %rbp
.seh_pushreg %rbp
.Lc150:
.Lc151:
movq %rsp,%rbp
.Lc152:
leaq -48(%rsp),%rsp
.seh_stackalloc 48
# Var bus located in register ax
# Var device located in register dx
# Var func located in register r8w
# Var regnum located in register r9w
# Var value located in register cx
movq %rbx,-16(%rbp)
.seh_savereg %rbx, 32
.seh_endprologue
# Var Send located at rbp-8, size=OS_32
movw %cx,%ax
movw 48(%rbp),%bx
# PeepHole Optimization,var11
.Ll472:
# [1015] Send := $80000000 or (bus shl 16) or (device shl 11) or (func shl 8) or (regnum and $fc);
andl $65535,%eax
shll $16,%eax
orl $2147483648,%eax
# PeepHole Optimization,var11
andl $65535,%edx
shll $11,%edx
orl %eax,%edx
# PeepHole Optimization,var11
andl $65535,%r8d
shll $8,%r8d
orl %edx,%r8d
# PeepHole Optimization,var1
# PeepHole Optimization,var11
andl $252,%r9d
orl %r8d,%r9d
movl %r9d,-8(%rbp)
.Ll473:
# [1016] write_portd(@Send, PCI_CONF_PORT_INDEX);
leaq -8(%rbp),%rcx
movl $3320,%edx
call ARCH_$$_WRITE_PORTD$POINTER$WORD
.Ll474:
# [1017] write_portw(value, PCI_CONF_PORT_DATA);
movw %bx,%cx
# Var value located in register cx
# PeepHole Optimization,var11
andl $65535,%ecx
movl $3324,%edx
call ARCH_$$_WRITE_PORTW$WORD$WORD
.Ll475:
# [1018] end;
movq -16(%rbp),%rbx
leaq (%rbp),%rsp
popq %rbp
ret
.seh_endproc

The first thing that I realize was the the optimized version is not generating the correct source when is exiting since it should return "Send", but am I right? The assembler code of write_portd remains the same, Am I missing something? 

Regards, Matias. 


_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Matias Vara
I think the problem was the way write_portd() is implemented:

procedure write_portd(const Data: Pointer; const Port: Word); {$IFDEF ASMINLINE} inline; {$ENDIF}
asm // RCX: data, RDX: port
  {$IFDEF LINUX} mov dx, port {$ENDIF}
mov rsi, data // DX=port
        outsd
end;   

If I replace with something that does not use the outsd instruction, it works fine. 

Matias

2018-01-10 15:55 GMT+01:00 Matias Vara <[hidden email]>:
Hello everyone, 

I am getting an exception when I enable the -O2 optimization. More precisaily, the line that stars with write_portd.... is corrupting the data section. This is the pascal code: 

function PciReadDword(const bus, device, func, regnum: UInt32): UInt32;
var
  Send: DWORD;
begin
  Send := $80000000 or (bus shl 16) or (device shl 11) or (func shl 8) or (regnum shl 2);
  write_portd(@Send, PCI_CONF_PORT_INDEX);
  read_portd(@Send, PCI_CONF_PORT_DATA);
  Result := Send;
end;  

which generates (without -02):

.section .text.n_arch_$$_pcireaddword$longword$longword$longword$longword$$longword,"x"
.balign 16,0x90
.globl ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD
ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD:
.Lc207:
.seh_proc ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD
.Ll464:
# [992] begin
pushq %rbp
.seh_pushreg %rbp
.Lc209:
.Lc210:
movq %rsp,%rbp
.Lc211:
leaq -80(%rsp),%rsp
.seh_stackalloc 80
.seh_endprologue
# Var bus located at rbp-8, size=OS_32
# Var device located at rbp-16, size=OS_32
# Var func located at rbp-24, size=OS_32
# Var regnum located at rbp-32, size=OS_32
# Var $result located at rbp-40, size=OS_32
# Var Send located at rbp-48, size=OS_32
movl %ecx,-8(%rbp)
movl %edx,-16(%rbp)
movl %r8d,-24(%rbp)
movl %r9d,-32(%rbp)
.Ll465:
# [993] Send := $80000000 or (bus shl 16) or (device shl 11) or (func shl 8) or (regnum shl 2);
movl -8(%rbp),%eax
shll $16,%eax
orl $2147483648,%eax
movl -16(%rbp),%edx
shll $11,%edx
orl %eax,%edx
movl -24(%rbp),%eax
shll $8,%eax
orl %edx,%eax
movl -32(%rbp),%edx
shll $2,%edx
orl %eax,%edx
movl %edx,-48(%rbp)
.Ll466:
# [995] write_portd(@Send, PCI_CONF_PORT_INDEX);
leaq -48(%rbp),%rcx
movl $3320,%edx
call ARCH_$$_WRITE_PORTD$POINTER$WORD
.Ll467:
# [996] read_portd(@Send, PCI_CONF_PORT_DATA);
leaq -48(%rbp),%rcx
movl $3324,%edx
call ARCH_$$_READ_PORTD$POINTER$WORD
.Ll468:
# [997] Result := Send;
movl -48(%rbp),%eax
movl %eax,-40(%rbp)
.Ll469:
# [998] end;
movl -40(%rbp),%eax
nop
leaq (%rbp),%rsp
popq %rbp
ret
.seh_endproc
.Lc208:
.Lt28:
.Ll470:

and with -O2:

.section .text.n_arch_$$_pciwriteword$word$word$word$word$word,"x"
.balign 16,0x90
.globl ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD
ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD:
.Lc148:
# Temps allocated between rbp-16 and rbp-8
.seh_proc ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD
.Ll471:
# [1014] begin
pushq %rbp
.seh_pushreg %rbp
.Lc150:
.Lc151:
movq %rsp,%rbp
.Lc152:
leaq -48(%rsp),%rsp
.seh_stackalloc 48
# Var bus located in register ax
# Var device located in register dx
# Var func located in register r8w
# Var regnum located in register r9w
# Var value located in register cx
movq %rbx,-16(%rbp)
.seh_savereg %rbx, 32
.seh_endprologue
# Var Send located at rbp-8, size=OS_32
movw %cx,%ax
movw 48(%rbp),%bx
# PeepHole Optimization,var11
.Ll472:
# [1015] Send := $80000000 or (bus shl 16) or (device shl 11) or (func shl 8) or (regnum and $fc);
andl $65535,%eax
shll $16,%eax
orl $2147483648,%eax
# PeepHole Optimization,var11
andl $65535,%edx
shll $11,%edx
orl %eax,%edx
# PeepHole Optimization,var11
andl $65535,%r8d
shll $8,%r8d
orl %edx,%r8d
# PeepHole Optimization,var1
# PeepHole Optimization,var11
andl $252,%r9d
orl %r8d,%r9d
movl %r9d,-8(%rbp)
.Ll473:
# [1016] write_portd(@Send, PCI_CONF_PORT_INDEX);
leaq -8(%rbp),%rcx
movl $3320,%edx
call ARCH_$$_WRITE_PORTD$POINTER$WORD
.Ll474:
# [1017] write_portw(value, PCI_CONF_PORT_DATA);
movw %bx,%cx
# Var value located in register cx
# PeepHole Optimization,var11
andl $65535,%ecx
movl $3324,%edx
call ARCH_$$_WRITE_PORTW$WORD$WORD
.Ll475:
# [1018] end;
movq -16(%rbp),%rbx
leaq (%rbp),%rsp
popq %rbp
ret
.seh_endproc

The first thing that I realize was the the optimized version is not generating the correct source when is exiting since it should return "Send", but am I right? The assembler code of write_portd remains the same, Am I missing something? 

Regards, Matias. 



_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Karoly Balogh (Charlie/SGR)
In reply to this post by Matias Vara
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> I am getting an exception when I enable the -O2 optimization. More
> precisaily, the line that stars with write_portd.... is corrupting the
> data section. This is the pascal code: 
>
> function PciReadDword(const bus, device, func, regnum: UInt32): UInt32;
> var
>   Send: DWORD;
> begin
>   Send := $80000000 or (bus shl 16) or (device shl 11) or (func shl 8) or (regnum shl 2);
>   write_portd(@Send, PCI_CONF_PORT_INDEX);
>   read_portd(@Send, PCI_CONF_PORT_DATA);
>   Result := Send;
> end;  
>
> which generates (without -02):
>
> .section .text.n_arch_$$_pcireaddword$longword$longword$longword$longword$$longword,"x"
> .balign 16,0x90
> .globl ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD
> ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD:
>
> (***shipp***)
>
> and with -O2:
>
> .section .text.n_arch_$$_pciwriteword$word$word$word$word$word,"x"
> .balign 16,0x90
> .globl ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD
> ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD:
>
> The first thing that I realize was the the optimized version is not
> generating the correct source when is exiting since it should return
> "Send", but am I right? The assembler code of write_portd remains the
> same, Am I missing something? 
The -O2 version of the function you sent is from a different one, it's
from a Write function, not a Read... So no wonder it doesn't return
anything... :)

BTW, -O2 uses register variables, while -O- doesn't. If your assembler
trashes one of them without preserving one, it can lead to crashes...
But there could be many other reasons.

Without seeing the actual assembler functions it's hard to tell.

Charlie
_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Matias Vara
Hi Karol and thanks you very much! I got confused with the function names, I feel very sorry.
As I said in my previous email, I fixed by rewriting the assembler function. However, I don't why it worked. 
So I understand you correctly, if I use assembler in my procedures there could be a risk that I trash a variable that the compiler is using. Is the compiler warning me about this? 

Matias  

2018-01-10 17:14 GMT+01:00 Karoly Balogh (Charlie/SGR) <[hidden email]>:
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> I am getting an exception when I enable the -O2 optimization. More
> precisaily, the line that stars with write_portd.... is corrupting the
> data section. This is the pascal code: 
>
> function PciReadDword(const bus, device, func, regnum: UInt32): UInt32;
> var
>   Send: DWORD;
> begin
>   Send := $80000000 or (bus shl 16) or (device shl 11) or (func shl 8) or (regnum shl 2);
>   write_portd(@Send, PCI_CONF_PORT_INDEX);
>   read_portd(@Send, PCI_CONF_PORT_DATA);
>   Result := Send;
> end;  
>
> which generates (without -02):
>
> .section .text.n_arch_$$_pcireaddword$longword$longword$longword$longword$$longword,"x"
> .balign 16,0x90
> .globl ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD
> ARCH_$$_PCIREADDWORD$LONGWORD$LONGWORD$LONGWORD$LONGWORD$$LONGWORD:
>
> (***shipp***)
>
> and with -O2:
>
> .section .text.n_arch_$$_pciwriteword$word$word$word$word$word,"x"
> .balign 16,0x90
> .globl ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD
> ARCH_$$_PCIWRITEWORD$WORD$WORD$WORD$WORD$WORD:
>
> The first thing that I realize was the the optimized version is not
> generating the correct source when is exiting since it should return
> "Send", but am I right? The assembler code of write_portd remains the
> same, Am I missing something? 

The -O2 version of the function you sent is from a different one, it's
from a Write function, not a Read... So no wonder it doesn't return
anything... :)

BTW, -O2 uses register variables, while -O- doesn't. If your assembler
trashes one of them without preserving one, it can lead to crashes...
But there could be many other reasons.

Without seeing the actual assembler functions it's hard to tell.

Charlie

_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal


_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Karoly Balogh (Charlie/SGR)
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> Hi Karol and thanks you very much! I got confused with the function
> names, I feel very sorry.As I said in my previous email, I fixed by
> rewriting the assembler function. However, I don't why it worked. 

By accident. Simply the register/stack/memory layout being different upon
entry, and it worked by pure luck.

> So I understand you correctly, if I use assembler in my procedures there
> could be a risk that I trash a variable that the compiler is using.

Yes. All operating systems and CPU architectures define a so called ABI or
calling convention, which functions must respect. This details which
registers are the parameters to be passed on, and which registers are free
to destroy in a function and which ones *MUST* be saved/preserved (usually
on the stack). There's no way around this. The compiler will expect that
your assembly subfunctions play by the rules.

See here, for example, for x86:
https://en.wikipedia.org/wiki/X86_calling_conventions

> Is  the compiler warning me about this? 

No. Assembly is quite a minefield in this regard. If you use assembler,
the compiler assumes you know what you're doing, and doesn't analyze the
assembler function. You have to respect the ABI of your CPU *AND* target
platform by hand, and save the nonvolatile registers.

(This is BTW, not Free Pascal specific. Delphi does the same, or more or
less any other language which supports inline assembly.)

Charlie

_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Matias Vara
Thank you very much Karol, I completly missed this point during the development of my kernel (Or maybe I had in mind sometime ago but I forgot it)
It was only when I decided to play with -02 that all these issues arrised. 

Matias

2018-01-10 17:34 GMT+01:00 Karoly Balogh (Charlie/SGR) <[hidden email]>:
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> Hi Karol and thanks you very much! I got confused with the function
> names, I feel very sorry.As I said in my previous email, I fixed by
> rewriting the assembler function. However, I don't why it worked. 

By accident. Simply the register/stack/memory layout being different upon
entry, and it worked by pure luck.

> So I understand you correctly, if I use assembler in my procedures there
> could be a risk that I trash a variable that the compiler is using.

Yes. All operating systems and CPU architectures define a so called ABI or
calling convention, which functions must respect. This details which
registers are the parameters to be passed on, and which registers are free
to destroy in a function and which ones *MUST* be saved/preserved (usually
on the stack). There's no way around this. The compiler will expect that
your assembly subfunctions play by the rules.

See here, for example, for x86:
https://en.wikipedia.org/wiki/X86_calling_conventions

> Is  the compiler warning me about this? 

No. Assembly is quite a minefield in this regard. If you use assembler,
the compiler assumes you know what you're doing, and doesn't analyze the
assembler function. You have to respect the ABI of your CPU *AND* target
platform by hand, and save the nonvolatile registers.

(This is BTW, not Free Pascal specific. Delphi does the same, or more or
less any other language which supports inline assembly.)

Charlie

_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal


_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Matias Vara
BTW, this only applies to inline assembler functions rigth? In the case of normal procedures that contains a block asm end; there is no problem, Am I right?

Matias

2018-01-10 17:51 GMT+01:00 Matias Vara <[hidden email]>:
Thank you very much Karol, I completly missed this point during the development of my kernel (Or maybe I had in mind sometime ago but I forgot it)
It was only when I decided to play with -02 that all these issues arrised. 

Matias

2018-01-10 17:34 GMT+01:00 Karoly Balogh (Charlie/SGR) <[hidden email]>:
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> Hi Karol and thanks you very much! I got confused with the function
> names, I feel very sorry.As I said in my previous email, I fixed by
> rewriting the assembler function. However, I don't why it worked. 

By accident. Simply the register/stack/memory layout being different upon
entry, and it worked by pure luck.

> So I understand you correctly, if I use assembler in my procedures there
> could be a risk that I trash a variable that the compiler is using.

Yes. All operating systems and CPU architectures define a so called ABI or
calling convention, which functions must respect. This details which
registers are the parameters to be passed on, and which registers are free
to destroy in a function and which ones *MUST* be saved/preserved (usually
on the stack). There's no way around this. The compiler will expect that
your assembly subfunctions play by the rules.

See here, for example, for x86:
https://en.wikipedia.org/wiki/X86_calling_conventions

> Is  the compiler warning me about this? 

No. Assembly is quite a minefield in this regard. If you use assembler,
the compiler assumes you know what you're doing, and doesn't analyze the
assembler function. You have to respect the ABI of your CPU *AND* target
platform by hand, and save the nonvolatile registers.

(This is BTW, not Free Pascal specific. Delphi does the same, or more or
less any other language which supports inline assembly.)

Charlie

_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal



_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Karoly Balogh (Charlie/SGR)
In reply to this post by Matias Vara
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> Thank you very much Karol, I completly missed this point during the
> development of my kernel (Or maybe I had in mind sometime ago but I
> forgot it)It was only when I decided to play with -02 that all these
> issues arrised. 

BTW, your code (as shown by the parameter passing in your assembler dumps)
seems to use the Microsoft x64 calling convention, which declares RSI
value as callee-saved, and then your code overwrites that without saving.

So the fact that your code overwrites it, could be the culprit. (And then
the OUTSD instruction also increases RSI by 4, so even if that MOV
wouldn't be there, you'd still need to save it.)

Also note that the 64bit x86-64 ABI is very different between Linux and
Windows, for example.

BTW, developing a kernel in Pascal is cool... ;)

Charlie
_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Karoly Balogh (Charlie/SGR)
In reply to this post by Matias Vara
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> BTW, this only applies to inline assembler functions rigth? In the case
> of normal procedures that contains a block asm end; there is no problem,
> Am I right?

No, it applies to *ALL* assembler code. Also inline blocks. The only
difference is, for blocks you can hint the compiler which registers you
use, so the compiler can also tailor code which improves the interaction
with your assembler code:

https://www.freepascal.org/docs-html/ref/refse87.html

This is not true for pure assembler functions tho', for performance
reasons among others.

Charlie
_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Matias Vara
In reply to this post by Karoly Balogh (Charlie/SGR)
Hello, 

2018-01-10 18:03 GMT+01:00 Karoly Balogh (Charlie/SGR) <[hidden email]>:
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> Thank you very much Karol, I completly missed this point during the
> development of my kernel (Or maybe I had in mind sometime ago but I
> forgot it)It was only when I decided to play with -02 that all these
> issues arrised. 

BTW, your code (as shown by the parameter passing in your assembler dumps)
seems to use the Microsoft x64 calling convention, which declares RSI
value as callee-saved, and then your code overwrites that without saving.

So the fact that your code overwrites it, could be the culprit. (And then
the OUTSD instruction also increases RSI by 4, so even if that MOV
wouldn't be there, you'd still need to save it.)

Also note that the 64bit x86-64 ABI is very different between Linux and
Windows, for example.


Thanks, I am going to review all the assembler code.
 
BTW, developing a kernel in Pascal is cool... ;)

It is indeed :)

Matias
 

Charlie

_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal


_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Matias Vara
In reply to this post by Karoly Balogh (Charlie/SGR)

2018-01-10 18:09 GMT+01:00 Karoly Balogh (Charlie/SGR) <[hidden email]>:
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> BTW, this only applies to inline assembler functions rigth? In the case
> of normal procedures that contains a block asm end; there is no problem,
> Am I right?

No, it applies to *ALL* assembler code. Also inline blocks. The only
difference is, for blocks you can hint the compiler which registers you
use, so the compiler can also tailor code which improves the interaction
with your assembler code:

https://www.freepascal.org/docs-html/ref/refse87.html

This is not true for pure assembler functions tho', for performance
reasons among others.

Charlie

This is very interesting. 

Thanks a lot. 

 
_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal


_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal
Reply | Threaded
Open this post in threaded view
|

Re: issue when enabling -O2

Matias Vara
After fixing the assembler code and other issues, e.g., interruption handlers were not restoring all registers, I observed an speed up of ~12% with -O2 when running a simple webserver example. That's very impresive! 

Thanks for you help, Matias.

2018-01-10 18:19 GMT+01:00 Matias Vara <[hidden email]>:

2018-01-10 18:09 GMT+01:00 Karoly Balogh (Charlie/SGR) <[hidden email]>:
Hi,

On Wed, 10 Jan 2018, Matias Vara wrote:

> BTW, this only applies to inline assembler functions rigth? In the case
> of normal procedures that contains a block asm end; there is no problem,
> Am I right?

No, it applies to *ALL* assembler code. Also inline blocks. The only
difference is, for blocks you can hint the compiler which registers you
use, so the compiler can also tailor code which improves the interaction
with your assembler code:

https://www.freepascal.org/docs-html/ref/refse87.html

This is not true for pure assembler functions tho', for performance
reasons among others.

Charlie

This is very interesting. 

Thanks a lot. 

 
_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal



_______________________________________________
fpc-pascal maillist  -  [hidden email]
http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal