ã°ã©ãã£ãã¯ã¬ã³ããªã³ã°ã®ããã°ã©ãã³ã°äžãç§ãã¡ã¯30ããªç§ã®GPUãã¬ãŒã ãå®çŸããããã«äœã¬ãã«ã®æé©åãå¿
èŠãªäžçã«äœãã§ããŸãã ãããè¡ãããã«ãããŸããŸãªææ³ã䜿çšããããã©ãŒãã³ã¹ãæ¹åããæ°ããã¬ã³ããªã³ã°ãã¹ïŒãžãªã¡ããªå±æ§ããã¯ã¹ãã£ãã£ãã·ã¥ããšã¯ã¹ããŒããªã©ïŒãGPRå§çž®ãã¬ã€ãã³ã·ãŒé衚瀺ãROPããŒãããéçºããŸãã...
CPUããã©ãŒãã³ã¹ãåäžãããåéã§ã¯ãããŸããŸãªããªãã¯ãäžåºŠã«äœ¿çšãããŸãããã
ALUèšç®ãé«éåããããã«çŸä»£ã®ãããªã«ãŒãã«äœ¿çšãããŠããããšã¯æ³šç®ã«å€ããŸãïŒ
AMD GCNã®äœã¬ãã«æé©å ã
Quakeéå¹³æ¹æ ¹ ïŒã
Quakeã®ã¯ã€ãã¯éå¹³æ¹æ ¹ãããæè¿ãç¹ã«64ããããžã®ç§»è¡ãèæ
®ããŠãæé©åãããŠããªãã³ãŒãã®æ°ãå¢å ããŠããããšã«æ°ä»ããŸããã以åã«èç©ããããã¹ãŠã®ç¥èãæ¥çã§æ¥éã«å€±ãããŠãããã®ããã§ãã ã¯ããçŸä»£ã®ããã»ããµã®é«éå¹³æ¹æ ¹éé¢æ°ã®ãããªå€ãããªãã¯ã¯éå¹æã§ãã ããããããã°ã©ããŒã¯äœã¬ãã«ã®æé©åãå¿ããŠã¯ãªãããã³ã³ãã€ã©ãŒããã¹ãŠã®åé¡ã解決ããããšãæåŸ
ããŠãã ããã
決ããªãã§ããã®èšäºã¯ããŒããŠã§ã¢ã®å®å
šãªããŒãã³ã¢ã¬ã€ãã§ã¯ãããŸããã ããã¯åãªã玹ä»ã§ããããªãã€ã³ããŒã§ãããCPUçšã®å¹æçãªã³ãŒããèšè¿°ããããã®äžé£ã®åºæ¬ååã§ãã ç§
ãè¿œå ã§ããããã»ããµã«ã€ããŠè©±ããŠããå Žå ã§ãã ã
äœã¬ãã«ã®æèãä»æ¥ã§ãæçšã§ããããšã瀺ãããã
ããã®èšäºã§ã¯ããã£ãã·ã³ã°ããã¯ãã«ã»ããã°ã©ãã³ã°ãã¢ã»ã³ãã©ãŒã»ã³ãŒãã®èªã¿åããšç解ãããã³ã³ã³ãã€ã©ãŒã«ãšã£ãŠäŸ¿å©ãªã³ãŒãã®äœæãæ€èšããŸãã
ãªãããããïŒ
äŒæ©ãå¿ããªãã§ãã ãã
1980幎代ã«ã¯ãã¡ã¢ãªãã¹ã®åšæ³¢æ°ã¯CPUã®åšæ³¢æ°ãšçãããé
延ã¯ã»ãŒãŒãã§ããã ããããããã»ããµã®ããã©ãŒãã³ã¹ã¯ã ãŒã¢ã®æ³åã«åŸã£ãŠå¯Ÿæ°çã«å¢å ããRAMãããã®ããã©ãŒãã³ã¹ã¯äžåè¡¡ã«å¢å ãããããã¡ã¢ãªã¯ããã«ããã«ããã¯ã«ãªããŸããã ãããŠããã€ã³ãã¯ãããé«éãªã¡ã¢ãªãäœæã§ããªããšããããšã§ã¯ãããŸãããããã¯å¯èœã§ããã
çµæžçã«äžå©ã§ããCPUãšã¡ã¢ãªã®é床ã®å€æŽã¡ã¢ãªããã©ãŒãã³ã¹ã®åœ±é¿ã軜æžããããã«ãCPUéçºè
ã¯ããã»ããµãšã¡ã€ã³ã¡ã¢ãªã®éã«ãã®éåžžã«é«äŸ¡ãªã¡ã¢ãªãå°éè¿œå ããŸãããããã
ããã»ããµãã£ãã·ã¥ã®è¡šç€ºæ¹æ³ã§ãã
ã¢ã€ãã¢ã¯æ¬¡ã®ãšããã§ããçæéã«åãã³ãŒããŸãã¯ããŒã¿ãåã³å¿
èŠã«ãªãå¯èœæ§ãé«ãã§ãã
- 空éçå±ææ§ïŒã³ãŒãå
ã§ã«ãŒããããããåãã³ãŒããç¹°ãè¿ãå®è¡ãããŸãã
- äžæçãªããŒã«ãªãã£ïŒçæé䜿çšãããã¡ã¢ãªã®ã»ã¯ã·ã§ã³ãäºãã«é£æ¥ããŠããªãå Žåã§ããåãããŒã¿ãããã«åã³äœ¿çšãããå¯èœæ§ãé«ãã§ãã
CPUãã£ãã·ã¥ã¯çç£æ§ãåäžãããããã®è€éãªææ³ã§ãããããã°ã©ããŒã®å©ãããªããã°æ£ããæ©èœããŸããã æ®å¿µãªãããå€ãã®éçºè
ã¯ãã¡ã¢ãªäœ¿çšéãšCPUãã£ãã·ã¥ã®æ§é ã®ã³ã¹ããèªèããŠããŸããã
ããŒã¿æåã¢ãŒããã¯ãã£
ã²ãŒã ãšã³ãžã³ã«èå³ããããŸãã å¢ãç¶ããããŒã¿ãåŠçããå€æããŠãªã¢ã«ã¿ã€ã ã§è¡šç€ºããŸãã ãããšãå¹ççã«åé¡ã解決ããå¿
èŠæ§ãèãããšãããã°ã©ããŒã¯èªåãåŠçããããŒã¿ãç解ããã³ãŒããæ©èœããæ©åšãç¥ãå¿
èŠããããŸãã
ãããã£ãŠã圌ã¯ããŒã¿æåèšèšïŒDoDïŒãå®è£
ããå¿
èŠæ§ãèªèããŠããå¿
èŠããããŸãããããšããã³ã³ãã€ã©ãç§ã®ããã«ãããè¡ãã§ããããïŒ
ã·ã³ãã«ãªè¿œå ã å·ŠåŽã¯C ++ãå³åŽã¯çµæã®ã¢ã»ã³ãã©ã³ãŒãã§ããAMD Jaguarããã»ããµã®äžèšã®äŸãèŠãŠã¿ãŸãããïŒã²ãŒã ã³ã³ãœãŒã«ã§äœ¿çšããããã®ãšåæ§ïŒïŒæçšãªãªã³ã¯ïŒ
AMDã®Jaguar MicroarchitectureïŒã¡ã¢ãªéå±€ ã
AMD Athlon 5350 APUããã³AM1ãã©ãããã©ãŒã ã¬ãã¥ãŒ-ããã©ãŒãã³ã¹-ã·ã¹ãã ã¡ã¢ãª ïŒïŒ
- ããŠã³ããŒãæäœïŒ ãã£ãã·ã¥ãªãã§çŽ200ãµã€ã¯ã«ïŒ
- å®éã®äœæ¥ïŒ Inc eax ïŒ1ãµã€ã¯ã«ïŒ
- ã¹ãã¬ãŒãžæäœïŒã3ã«ãŒããåããã£ãã·ã¥ã©ã€ã³ïŒ
ãã®ãããªåçŽãªäŸã§ããããã»ããµã®æéã®ã»ãšãã©ã¯ããŒã¿ã®åŸ
æ©ã«è²»ããããããè€éãªããã°ã©ã ã§ã¯ãããã°ã©ããŒãåºç€ãšãªãã¢ãŒããã¯ãã£ã«æ³šæãæããŸã§ç¶æ³ã¯æ¹åããŸããã
èŠããã«ãã³ã³ãã€ã©ã¯ïŒ
- 圌ãã¯å
šäœåãèŠãŠããªããããããŒã¿ãã©ã®ããã«ç·šæãããã©ã®ããã«ã¢ã¯ã»ã¹ãããããäºæž¬ããããšã¯éåžžã«å°é£ã§ãã
- ç®è¡æŒç®ã¯é©åã«æé©åã§ããŸããããããã®æŒç®ã¯æ°·å±±ã®äžè§ã«ãããªãããšããããŸãã
ã³ã³ãã€ã©ã«ã¯ãã¡ã¢ãªã¢ã¯ã»ã¹ã®æé©åã«é¢ããŠãæäœã®äœå°ãããªããããŸãã ã³ã³ããã¹ãã¯ããã°ã©ãã®ã¿ãç¥ã£ãŠãããã©ã®ã³ãŒããæžãããããç¥ã£ãŠããã®ã¯åœŒã ãã§ãã ãããã£ãŠã
æ
å ±ãããŒã®æµããç解ããæåã«ãææ°ã®CPUããå¯èœãªéããã¹ãŠãå§çž®ããããã«
ãããŒã¿åŠçã«é²ãå¿
èŠããããŸãã
æ®å¿ãªçå®ïŒOOP察DoD
ã¡ã¢ãªã¢ã¯ã»ã¹ã¹ããŒã ã®ããã©ãŒãã³ã¹ãžã®åœ±é¿ïŒMike Acton GDC15ïŒä»æ¥ã®ãªããžã§ã¯ãæåããã°ã©ãã³ã°ïŒOOPïŒã¯äž»èŠãªãã©ãã€ã ã§ãããå°æ¥ã®ããã°ã©ããäž»ã«ç 究ããŠãããã®ã§ãã å®äžçã®ãªããžã§ã¯ããšãããã®é¢ä¿ã®èŠ³ç¹ããèããããšãã§ããŸãã
ã¯ã©ã¹ã§ã¯ãéåžžãã³ãŒããšããŒã¿ãã«ãã»ã«åãããããããªããžã§ã¯ãã«ã¯ãã¹ãŠã®æ
å ±ãå«ãŸããŸãã æ§é ã®é
åïŒæ§é ã®é
åïŒããã³*æ§é /ãªããžã§ã¯ããžã®*ãã€ã³ã¿ãŒã®é
åã®äœ¿çšã匷å¶ãããšãOOPã¯ãã£ãã·ã¥ã䜿çšããã¡ã¢ãªãžã®ã¢ã¯ã»ã¹ã®å éã«åºã¥ã
空éçå±ææ§ã®ååã«éåããŸãã ããã»ããµã®ããã©ãŒãã³ã¹ãšã¡ã¢ãªã®ã®ã£ãããèŠããŠããŸããïŒ
éå°ãªã«ãã»ã«åã¯ãææ°ã®ããŒããŠã§ã¢ã§äœæ¥ããå Žåã«æ害ã§ãããœãããŠã§ã¢ãéçºããéã«ã¯ãã³ãŒãèªäœããããŒã¿å€æã®ç解ã«çŠç¹ã移ãå¿
èŠãããããŸããçŸåšã®ããã°ã©ãã³ã°æåãšOOPãµããŒã¿ãŒã«ãã£ãŠèª²ãããç¶æ³ã«å¯Ÿå¿ããå¿
èŠãããããšãäŒããããšæããŸãã
çµè«ãšããŠããã€ã¯ã¢ã¯ãã³ãèªã£ã3ã€ã®å€§ããªåãåŒçšããããšæããŸãïŒ
CppCon 2014ïŒãã€ã¯ã¢ã¯ãã³ããããŒã¿æåèšèšãšC ++ã ïŒ
ãœãããŠã§ã¢ã¯ãã©ãããã©ãŒã ã§ã
- 䜿çšããããŒããŠã§ã¢ãç解ããå¿
èŠããããŸã
ã³ãŒãã¢ãŒããã¯ãã£ã¯äžçã§ã¢ãã«åãããŠããŸãã
- ã³ãŒãã¢ãŒããã¯ãã£ã¯ããŒã¿ã¢ãã«ãšäžèŽããå¿
èŠããããŸã
ã³ãŒãã¯ããŒã¿ãããéèŠã§ã
- ã¡ã¢ãªã¯ããã«ããã¯ã§ãããããŒã¿ãæãéèŠãªãã®ã§ãã
éãåŠã¶
ãã€ã¯ãããã»ããµãŒãã£ãã·ã¥
ããã»ããµã¯ç©ççã«ã¡ã€ã³ã¡ã¢ãªã«çŽæ¥æ¥ç¶ãããŠããŸããã ææ°ã®ããã»ããµãŒã§ã®RAMã®æäœïŒããŒããšã¹ãã¬ãŒãžïŒã¯ãã¹ãŠããã£ãã·ã¥ãä»ããŠå®è¡ãããŸãã
ããã»ããµãåŒã³åºãïŒããŒãïŒã³ãã³ãã§ããžãŒã§ããå Žåã
ã¡ã¢ãªã³ã³ãããŒã©ãŒã¯ãŸããèªã¿åãå¿
èŠãããã¡ã¢ãªã¢ãã¬ã¹ã«å¯Ÿå¿ãã
ã¿ã°ãæã€ãšã³ããªããã£ãã·ã¥ã§æ€çŽ¢ããŸãã ãã®ãããªã¬ã³ãŒããæ€åºã
ããå Žåãã€ãŸããã£ãã·ã¥ã«èšé²ã
ããå Žå ãããŒã¿ã¯ãã£ãã·ã¥ããçŽæ¥ããŒãã§ããŸãã ããã§ãªãå Žå-
ãã£ãã·ã¥ãã¹ -ã³ã³ãããŒã©ãŒã¯ãããäœããã£ãã·ã¥ã¬ãã«ïŒããšãã°ãæåã«L1Dã次ã«L2ã次ã«L3ïŒãæåŸã«RAMããããŒã¿ãæœåºããããšããŸãã ãã®åŸãããŒã¿ã¯L1ãL2ãããã³L3ïŒ
ãã£ãã·ã¥ãå«ã ïŒã«ä¿åãããŸãã
ã»ãããããããã¯ã¹ã®ã¡ã¢ãªã¬ã€ãã³ã·-ãžã§ã€ãœã³ã°ã¬ãŽãªãŒãã®ç°¡ç¥å³ã§ã¯ãããã»ããµïŒPS4ããã³XB1ã§äœ¿çšãããAMD JaguarïŒã«ã¯ãL1ãšL2ã®2ã€ã®ãã£ãã·ã¥ã¬ãã«ããããŸãã ã芧ã®ãšãããããŒã¿ããã£ãã·ã¥ãããã ãã§ãªããL1ã¯ã³ãŒãåœä»€ãã£ãã·ã¥ïŒL1IïŒãšããŒã¿ãã£ãã·ã¥ïŒL1DïŒã«åå²ãããŸãã ã³ãŒããšããŒã¿ã«å¿
èŠãªã¡ã¢ãªé åã¯äºãã«ç¬ç«ããŠããŸãã äžè¬çã«ãL1Iã¯L1Dãããã¯ããã«å°ãªãåé¡ãäœæããŸãã
ã¬ã€ãã³ã·ã«é¢ããŠã¯ãL1ã¯L2ãããæ¡éãã«
éããã¡ã€ã³ã¡ã¢ãª
ããã10åé«éã§ã ã æ°åã§ã¯æ²ããã«èŠããŸããããã¹ãŠã®
ãã£ãã·ã¥ãã¹ã«å¯ŸããŠå
šé¡ãæ¯æãå¿
èŠã¯ãããŸããã ã¬ã€ãã³ã·ãŒãã¹ã±ãžã¥ãŒãªã³ã°ãªã©ãé衚瀺ã«ããããšã§ã³ã¹ããåæžã§ããŸãããããã¯ãã§ã«æçš¿ã®ç¯å²å€ã§ãã
ã¡ã¢ãªã¢ã¯ã»ã¹é
延-Andreas Fredrikssonåãã£ãã·ã¥ãšã³ããªïŒ
ãã£ãã·ã¥ã©ã€ã³ ïŒã«ã¯ãããã€ãã®é£ç¶ããåèªïŒAMD JaguarãŸãã¯Core i7ã®å Žåã¯64ãã€ãïŒãå«ãŸããŠããŸãã CPUãå€ãååŸãŸãã¯ä¿åããåœä»€ãå®è¡ãããšããã£ãã·ã¥ã©ã€ã³å
šäœãL1Dã«æž¡ãããŸãã ä¿åããå Žåãæžã蟌ã¿å
ã®ãã£ãã·ã¥ã©ã€ã³ã¯ãRAMã«æžãæ»ããããŸã§
ããŒãã£ãšããŠããŒã¯ãããŸãã
ã¬ãžã¹ã¿ããã¡ã¢ãªãžã®æžã蟌ã¿ãã£ãã·ã¥ã«æ°ããããŒã¿ãããŒãã§ããããã«ããã«ã¯ãã»ãšãã©ã®å ŽåããŸã
ãã£ãã·ã¥ã©ã€ã³ãåé€ããŠã¹ããŒã¹ã解æŸããå¿
èŠããããŸãã
- æä»çãã£ãã·ã¥ïŒååŸããããšããã£ãã·ã¥ã©ã€ã³ã¯L1DããL2ã«ç§»åããŸãã ããã¯ãL2ã«ã¹ããŒã¹ãå²ãåœãŠãå¿
èŠãããããšãæå³ããã¡ã€ã³ã¡ã¢ãªãžã®ããŒã¿ã®å転éã«ã€ãªããå¯èœæ§ããããŸãã ååŸããè¡ãL1DããL2ã«éã¶ãšããã£ãã·ã¥ãã¹ã®é
延ã«åœ±é¿ããŸãã
- å
æ¬çãã£ãã·ã¥ïŒ L1Dã®åãã£ãã·ã¥ã©ã€ã³ã¯L2ã§ãè¡šãããŸãã L1Dããã®æœåºã¯ã¯ããã«é«éã§ããããã以äžã®ã¢ã¯ã·ã§ã³ã¯äžèŠã§ãã
æ°ããIntelããã³AMDããã»ããµã¯ã
å
æ¬çãªãã£ãã·ã¥ã䜿çšããŸãã ããã¯æåã¯ééãã®ããã«æãããããããŸãããã次ã®2ã€ã®å©ç¹ããããŸãã
- ååŸæã«ãã£ãã·ã¥ã©ã€ã³ãå¥ã®ã¬ãã«ã«ç§»åããå¿
èŠããªãããããã£ãã·ã¥ãã¹ã®é
延ãæžå°ããŸãã
- ããã«ãŒãã«ãå¥ã®ã«ãŒãã«ã䜿çšããŠããããŒã¿ãå¿
èŠãšããå Žåãå¥ã®ã«ãŒãã«ã®åäœãäžæããããšãªãããã£ãã·ã¥ã®äžäœã¬ãã«ããææ°ããŒãžã§ã³ãæœåºã§ããŸãã ãããã£ãŠããã«ãã³ã¢ã¢ãŒããã¯ãã£ã®éçºã§ã¯ãå
æ¬çãã£ãã·ã¥ãéåžžã«äžè¬çã«ãªããŸããã
ãã£ãã·ã¥ã©ã€ã³ã®è¡çªïŒè€æ°ã®ã³ã¢ããã£ãã·ã¥ã©ã€ã³ãå¹ççã«èªã¿åãããšãã§ããŸãããæžã蟌ã¿æäœã«ããããã©ãŒãã³ã¹ãäœäžããå¯èœæ§ããããŸãã ãåœå
±æãã®æŠå¿µã¯ãç°ãªãã«ãŒãã«ãåããã£ãã·ã¥ã©ã€ã³ã«ããç¬ç«ããããŒã¿ãå€æŽã§ããããšãæå³ããŸãã ãã£ãã·ã¥ã³ããŒã¬ã³ã¹ãããã³ã«ã«ãããšãã«ãŒãã«ããã£ãã·ã¥ã©ã€ã³ã«æžã蟌ããšãåãã¡ã¢ãªãåç
§ããå¥ã®ã³ã¢ã®ã©ã€ã³ãç¡å¹ã«ãªããŸãïŒ
ãã£ãã·ã¥ã¹ãªãã ããã£ãã·ã¥ãã©ãã·ã³ã°ïŒã ãã®çµæãåæžã蟌ã¿æäœäžã«ã¡ã¢ãªããã¯ãçºçããŸãã 誀ã£ãåé¢ã¯ãç°ãªãã³ã¢ãç°ãªãè¡ã§åäœãããããšã§åé¿ã§ããŸãïŒäœåãªã¹ããŒã¹ã䜿çšãã-äœåãªããã£ã³ã°ãæ§é ã64ãã€ããã€æãããªã©ïŒã
åã¹ã¬ããã®ç°ãªããã£ãã·ã¥ã©ã€ã³ã«ããŒã¿ãæžã蟌ãããšã«ããã誀ã£ãåé¢ãåé¿ããŸãã芧ã®ãšãããããŒããŠã§ã¢ã¢ãŒããã¯ãã£ãç解ããããšã¯ãèŠéããããå¯èœæ§ã®ããåé¡ãæ€åºããŠä¿®æ£ããããã®éµã§ãã
Coreinfoã¯ã³ãã³ãã©ã€ã³ãŠãŒãã£ãªãã£ã§ãã ããã»ããµã«ãããã¹ãŠã®åœä»€ã»ããã«é¢ãã詳现æ
å ±ãæäŸããåè«çããã»ããµã«å²ãåœãŠãããŠãããã£ãã·ã¥ãã¬ããŒãããŸãã Core i5-3570Kã®äŸã次ã«ç€ºããŸãã
*--- Data Cache 0, Level 1, 32 KB, Assoc 8, LineSize 64 *--- Instruction Cache 0, Level 1, 32 KB, Assoc 8, LineSize 64 *--- Unified Cache 0, Level 2, 256 KB, Assoc 8, LineSize 64 **** Unified Cache 1, Level 3, 6 MB, Assoc 12, LineSize 64 -*-- Data Cache 1, Level 1, 32 KB, Assoc 8, LineSize 64 -*-- Instruction Cache 1, Level 1, 32 KB, Assoc 8, LineSize 64 -*-- Unified Cache 2, Level 2, 256 KB, Assoc 8, LineSize 64 --*- Data Cache 2, Level 1, 32 KB, Assoc 8, LineSize 64 --*- Instruction Cache 2, Level 1, 32 KB, Assoc 8, LineSize 64 --*- Unified Cache 3, Level 2, 256 KB, Assoc 8, LineSize 64 ---* Data Cache 3, Level 1, 32 KB, Assoc 8, LineSize 64 ---* Instruction Cache 3, Level 1, 32 KB, Assoc 8, LineSize 64 ---* Unified Cache 4, Level 2, 256 KB, Assoc 8, LineSize 64
ããã§ã32 Kb L1ãã£ãã·ã¥ã32 Kb L1åœä»€ãã£ãã·ã¥ã256 Kb L2ãã£ãã·ã¥ãããã³6 Mb L3ãã£ãã·ã¥ã ãã®ã¢ãŒããã¯ãã£ã§ã¯ãL1ãšL2ãåã³ã¢ã«å²ãåœãŠãããL3ã¯ãã¹ãŠã®ã³ã¢ã§å
±æãããŸãã
AMD Jaguar CPUã®å Žåãåã³ã¢ã«ã¯å°çšã®L1ãã£ãã·ã¥ããããL2ã¯4ã³ã¢ã®ã°ã«ãŒã-ã¯ã©ã¹ã¿ãŒéã§å
±æãããŸãïŒJaguarã«ã¯L3ã¯ãããŸããïŒã
4ã³ã¢ã¯ã©ã¹ã¿ãŒïŒAMD JaguarïŒãã®ãããªã¯ã©ã¹ã¿ãŒã䜿çšããå Žåã¯ãç¹å¥ãªæ³šæãå¿
èŠã§ãã ã«ãŒãã«ããã£ãã·ã¥ã©ã€ã³ã«æžã蟌ããšãä»ã®ã«ãŒãã«ã§ã¯ç¡å¹ã«ãªããããã©ãŒãã³ã¹ãäœäžããå ŽåããããŸãã ããã«ããã®ãããªã¢ãŒããã¯ãã£ã§ã¯ããã¹ãŠãããã«æªåããå¯èœæ§ããããŸããã«ãŒãã«ã«ãã£ãŠåãã¯ã©ã¹ã¿ãŒã«ããæãè¿ãL2ããããŒã¿ãæœåºããã«
ã¯çŽ26ãµã€ã¯ã«ããã ãL2ããå¥ã®ã¯ã©ã¹ã¿ãŒãæœåºããã«
ã¯æ倧190ãµã€ã¯ã«ããããŸãã RAMããããŒã¿ãååŸããã®ã«å¹æµããŸãïŒ
AMD Jaguarã®ã¯ã©ã¹ã¿ãŒL2ã¬ã€ãã³ã·ãŒ-ãžã§ã€ãœã³ã°ã¬ãŽãªãŒãã£ãã·ã¥ã®äžè²«æ§ã®è©³çŽ°ã«ã€ããŠã¯ã
Cache Coherency Primerã®èšäºãåç
§ããŠãã ããã
ã¢ã»ã³ãã©ãŒã®åºæ¬
x86-64ããããx64ãIA-64ãAMD64 ...ãŸãã¯x64ã¢ãŒããã¯ãã£ã®èªç
IntelãšAMDã¯ãç¬èªã®64ãããã¢ãŒããã¯ãã£ãéçºããŸããïŒAMD64ãšIA-64ã IA-64ã¯ãx86ã¢ãŒããã¯ãã£ããäœãç¶æ¿ããŠããªããšããæå³ã§ãx86-32ãããããã»ããµãšã¯èããç°ãªããŸãã x86ã§ã®ã¢ããªã±ãŒã·ã§ã³ã¯ããšãã¥ã¬ãŒã·ã§ã³ã¬ãã«ãéããŠIA-64ã§å®è¡ããå¿
èŠãããããããã®ã¢ãŒããã¯ãã£ã§ã¯ããã©ãŒãã³ã¹ãäœäžããŸãã x86ãšã®äºææ§ããªããããIA-64ã¯åçšåéãé€ããŠé¢éžããŸããã§ããã äžæ¹ãAMDã¯ããä¿å®çãªã¢ãŒããã¯ãã£ãäœæãã64ãããåœä»€ã®æ°ããã»ããã§x86ãæ¡åŒµããŸããã Intel
ã¯ã64ãããæŠäºã«æããåãæ¡åŒµæ©èœã
x86ããã»ããµã«å°å
¥ããããšãäœåãªããããŸããã ãã®ããŒãã§ã¯ãx86ã¢ãŒããã¯ãã£ãŸãã¯AMD64ãšãåŒã°ããx86-64ãããã«ã€ããŠèª¬æããŸãã
é·å¹Žã«ããããPCããã°ã©ããŒã¯x86ã¢ã»ã³ãã©ãŒã䜿çšããŠé«æ§èœã³ãŒããèšè¿°ããŠããŸããïŒ
mode'X ' ãCPUã¹ããã³ã°ãè¡çªããœãããŠã§ã¢ã©ã¹ã¿ã©ã€ã¶ãŒ...ãããã32ãããã³ã³ãã¥ãŒã¿ãŒã¯åŸã
ã«64ãããã³ã³ãã¥ãŒã¿ãŒã«çœ®ãæãããã
ã¢ã»ã³ãã©ãŒã³ãŒããå€æŽãããŸããã
äžéšã®åŠçãäœéã§ãä»ã®åŠçãé«éã§ããçç±ãç解ããã«ã¯ãã¢ã»ã³ãã©ãŒãç¥ãå¿
èŠããããŸãã ãŸããçµã¿èŸŒã¿
é¢æ°ã䜿çšããŠã³ãŒãã®éèŠãªéšåãæé©åããæ¹æ³ãããã³ãœãŒã¹ã³ãŒãã¬ãã«ã§ã®ãããã°ãæå³ããªããªããšãã«æé©åãããïŒããšãã°-O3ïŒã³ãŒãããããã°ããæ¹æ³ãç解ããã®ã«ã圹ç«ã¡ãŸãã
ç»é²
ã¬ãžã¹ã¿ã¯ãã»ãšãã©ãŒãã®ã¬ã€ãã³ã·ïŒéåžžã¯1ããã»ããµãµã€ã¯ã«ïŒãåããéåžžã«é«éãªã¡ã¢ãªã®å°ããªæçã§ãã å
éšããã»ããµã¡ã¢ãªãšããŠäœ¿çšãããŸãã ããã»ããµåœä»€ã«ãã£ãŠçŽæ¥åŠçãããããŒã¿ãä¿åããŸãã
x64ããã»ããµã«ã¯16åã®æ±çšã¬ãžã¹ã¿ïŒGPRïŒããããŸãã ãããã¯ç¹å®ã®ããŒã¿åã®ä¿åã«ã¯äœ¿çšããããå®è¡æã«ã¯ãªãã©ã³ããšã¢ãã¬ã¹ãå«ãŸããŸãã
x64ã§ã¯ã8ã€ã®x86ã¬ãžã¹ã¿ã64ãããã«æ¡åŒµããã8ã€ã®æ°ãã64ãããã¬ãžã¹ã¿ãè¿œå ãããŸãã 64ãããã®ã¬ãžã¹ã¿åã¯rã§å§ãŸããŸãã ããšãã°ã
eax ïŒ32ãããïŒã®64ãããæ¡åŒµã¯
raxãšåŒã°ããŸãã æ°ããã¬ãžã¹ã¿ã®ååã¯
r8ãã
r15ã«å€æŽãããŸããã
äžè¬çãªã¢ãŒããã¯ãã£ïŒsoftware.intel.comïŒx64ã¬ãžã¹ã¿ã«ã¯ä»¥äžãå«ãŸããŸãã
- 16åã®64ãããæ±çšã¬ãžã¹ã¿ïŒGPRïŒãæåã®8åã¯raxãrbxãrcxãrdxãrbpãrsiãrdiãrspãšåŒã°ããŸãã 2çªç®ã®8ïŒr8 â r15ã
- fpræµ®åå°æ°ç¹ã¬ãžã¹ã¿ïŒx87 FPUïŒãã«ããŒãã8ã€ã®64ãããMMXã¬ãžã¹ã¿ïŒMMXåœä»€ã®ã»ããïŒã
- 16åã®128ããããã¯ãã«XMMã¬ãžã¹ã¿ïŒSSEåœä»€ã®ã»ããïŒã
æ°ããããã»ããµã®å ŽåïŒ
- XMMã¬ãžã¹ã¿ãŒãæ¡åŒµãã256ãããYMMã¬ãžã¹ã¿ãŒïŒAVXåœä»€ã®ã»ããïŒã
- XMMã¬ãžã¹ã¿ãŒãæ¡åŒµãããã®æ°ã32ã«å¢ãã512ãããZMMã¬ãžã¹ã¿ãŒïŒAVX-512åœä»€ã®ã»ããïŒã
ZMMãYMMãããã³XMMã¬ãžã¹ã¿éã®é¢ä¿æŽå²çãªçç±ãããããã€ãã®GPRã¯ç°ãªãæ¹æ³ã§åŒã³åºãããŸãã ããšãã°ã
axã¯ã¬ãžã¹ã¿Accumulatorã
cx -Counterã
dx -Dataã§ããã çŸåšã
ããŒããŠã§ã¢ã¹ã¿ãã¯ã®ç®¡ççšã«äºçŽãããŠãã
rsp ïŒã¹ã¿ãã¯ãã€ã³ã¿ãŒïŒãš
rbp ïŒããŒã¹ãã€ã³ã¿ãŒïŒãé€ãããããã®ã»ãšãã©ã¯ç¹å®ã®ç®çã倱ã£ãŠã
ãŸã ïŒãã ãã
rbpã¯ãã°ãã°ãæé©åããããGRPãšããŠäœ¿çšãããŸã-ãã¬ãŒã ãã€ã³ã¿ãŒãçç¥ããŸãïŒ Clangã§ïŒã
x86ã¬ãžã¹ã¿ã®äžäœãããã«ã¯ã
ãµãã¬ãžã¹ã¿ã䜿çšããŠã¢ã¯ã»ã¹ã§ããŸãã æåã®8ã€ã®x86ã¬ãžã¹ã¿ã®å Žåãã¬ã¬ã·ãŒåã䜿çšãããŸãã æ°ããã¬ãžã¹ã¿ïŒr8 â r15ïŒã¯ãåããå¯äžã®åçŽåãããã¢ãããŒãã䜿çšããŸãã
ååä»ãã¹ã«ã©ãŒã¬ãžã¹ã¿ã¢ãã¬ãã·ã³ã°
ã¢ã»ã³ãã©ãŒåœä»€ã«2ã€ã®ãªãã©ã³ããå¿
èŠãªå Žåãéåžžãæåã®ãªãã©ã³ããå®å
ã§ã2çªç®ã®ãªãã©ã³ãããœãŒã¹ã§ãã ããããã«ã¯ãåŠçããå¿
èŠãããããŒã¿ããŸãã¯ããŒã¿ã®ã¢ãã¬ã¹ãå«ãŸããŠããŸãã 3ã€ã®äž»ãªã¢ãã¬ãã·ã³ã°ã¢ãŒãããããŸãã
- ããã«
- mov eaxã4 ; 4ãeaxã«ç§»åããŸã
- ç»é²ããç»é²ãž
- mov eaxãecx ; ecxã³ã³ãã³ããeaxã«ç§»åããŸã
- éæ¥çïŒ
- mov eaxã[ebx] ; 4ãã€ãïŒeaxãµã€ãºïŒãeaxã®ebxã¢ãã¬ã¹ã«ç§»åããŸã
- mov byte ptr [rcx]ã5 ; rcxã§5 ãã€ã移åããŸã
- mov rdxãdword ptr [rcx + 4 * rax] ; dwordãrcx + 4 * rdxã®raxã¢ãã¬ã¹ã«ç§»åããŸã
dword ptrã¯ããµã€ãºãã£ã¬ã¯ãã£ããšåŒã°ããŸãã åç
§ãããã¡ã¢ãªé åã®ãµã€ãºã«äžç¢ºå®æ§ãããå Žåãã¢ã»ã³ãã©ã«ã©ã®ãµã€ãºããšãããæ瀺ããŸãïŒäŸïŒ
mov [rcx] ã5ïŒãã€ããæžã蟌ãå¿
èŠããããŸããïŒDwordïŒïŒã
ããã¯ããã€ãïŒ8ãããïŒãã¯ãŒãïŒ16ãããïŒãdwordïŒ32ãããïŒãqwordïŒ64ãããïŒãxmmwordïŒ128ãããïŒãymmwordïŒ256ãããïŒãzmmwordïŒ512-ãããïŒã
SIMDåœä»€ã»ãã
ã¹ã«ã©ãŒå®è£
ã¯ãäžåºŠã«1çµã®ãªãã©ã³ããæã€æŒç®ã瀺ããŸãã
ãã¯ãã«åã¯ãäžåºŠã«1ã€ã®ããŒã¿ãã£ã³ã¯ãåŠçãã代ããã«ãäžåºŠã«è€æ°ã®ãã£ã³ã¯ã®åŠçãéå§ãããšãã«ã¢ã«ãŽãªãºã ãå€æããããã»ã¹ã§ãïŒä»¥äžã§ãã®æ¹æ³ã説æããŸãïŒã
ææ°ã®ããã»ããµã¯ã䞊åããŒã¿åŠçã®ããã«
äžé£ã®SIMDåœä»€ ïŒãã¯ãã«åœä»€ïŒãå©çšã§ããŸãã
SIMDåŠçx86ããã»ããµãŒã§äœ¿çšå¯èœãªSIMDåœä»€ã»ããïŒ
- ãã«ãã¡ãã£ã¢æ¡åŒµïŒMMXïŒ
- ã¬ã¬ã·ãŒ 64ããããã¯ãã«ã¬ãžã¹ã¿ã«ããã¯ãããæŽæ°å€ã®ç®è¡æŒç®ããµããŒãããŸãã
- ã¹ããªãŒãã³ã°SIMDæ¡åŒµæ©èœïŒSSEïŒ
- 128ãããã®ãã¯ãã«ã¬ãžã¹ã¿ã«ããã¯ãããæµ®åå°æ°ç¹æ°ã®ç®è¡æŒç®ã æŽæ°ãšå粟床å€ã®ãµããŒããSSE2ã«è¿œå ãããŸããã
- Advanced Vector ExtensionsïŒAVXïŒ-x64ã®ã¿
- 256ãããã®ãã¯ãã«ã¬ãžã¹ã¿ã®ãµããŒããè¿œå ãããŸããã
- AVX-512-x64ã®ã¿
- 512ãããã®ãã¯ãã«ã¬ãžã¹ã¿ã®ãµããŒããè¿œå ãããŸããã
x64ããã»ããµã®ãã¯ãã«ã¬ãžã¹ã¿ã²ãŒã ãšã³ãžã³ã¯éåžžãå®è¡æéã®90ïŒ
ãã³ãŒãããŒã¹ã®å°ããªéšåã®èµ·åã«è²»ãããäž»ã«ããŒã¿ã®å埩ãšåŠçãè¡ããŸãã ãã®ãããªã·ããªãªã§ã¯ãSIMDã倧ããªéããçãå¯èœæ§ããããŸãã SSEåœä»€ã¯éåžžã128ãããã®ãã¯ãã«ã¬ãžã¹ã¿ã«ããã¯ããã4ã€ã®æµ®åå°æ°ç¹å€ã®ã»ããã®äžŠååŠçã«äœ¿çšãããŸãã
SSEã¯äž»ã«ãããŒã¿ã®åçŽè¡šçŸïŒé
åã®æ§é ãSoAïŒãšãã®åŠçã«çŠç¹ãåœãŠãŠããŸãã ããããäžè¬çã«ã
æ§é ã®é
åïŒAoSïŒãšæ¯èŒããSoAã®ããã©ãŒãã³ã¹ã¯ãã¡ã¢ãªã¢ã¯ã»ã¹ãã¿ãŒã³ã«äŸåããŸãã
- AoSã¯ããããæãèªç¶ãªãªãã·ã§ã³ã§ãç°¡åã«èšè¿°ã§ããŸãã OOPãã©ãã€ã ãæºãããŸãã
- ãã¹ãŠã®ã¡ã³ããŒãäžç·ã«ã¢ã¯ã»ã¹ãããå Žåã AoSã®ããŒã¿ã®å±ææ§ã¯åäžããŸãã
- SoAã¯ãããå€ãã®ãã¯ãã«åæ©èœïŒåçŽåŠçïŒãæäŸããŸãã
- SoAã¯ãé
åéã§ã®ã¿ããã£ã³ã°ã䜿çšããããšã«ãããå€ãã®å Žåããå°ãªãã¡ã¢ãªã䜿çšããŸãã
// Array Of Structures struct Sphere { float x; float y; float z; double r; }; Sphere* AoS; ( 8 ): ------------------------------------------------------------------ | x | y | z | r | pad | x | y | z | r | pad | x | y | z | r | pad ------------------------------------------------------------------ // Structure Of Arrays struct SoA { float* x; float* y; float* z; double* r; size_t size; }; : ------------------------------------------------------------------ | x | x | x ..| pad | y | y | y ..| pad | z | z | z ..| pad | r.. ------------------------------------------------------------------
AVXã¯SSEã®èªç¶ãªæ¡åŒµã§ãã ãã¯ãã«ã¬ãžã¹ã¿ã®ãµã€ãºã¯256ãããã«å¢å ããŸããã€ãŸããæ倧8ã€ã®æµ®åå°æ°ç¹æ°ã䞊åã«ããã¯ããã³åŠçã§ããŸãã Intelããã»ããµã¯æåã«256ãããã®ã¬ãžã¹ã¿ããµããŒãããŠãããAMDã«åé¡ãããå¯èœæ§ããããŸãã ãã«ããŒã¶ãŒããžã£ã¬ãŒãªã©ã®AMDã®åæã®AVXããã»ããµã¯ã256ãããã®æäœã128ãããã®ãã¢ã«å解ãããããSSEãšæ¯èŒããŠã¬ã€ãã³ã·ãå¢å ããŸãã
çµè«ãšããŠãAVXã®ã¿ïŒã³ã³ãã¥ãŒã¿ãŒãIntelã§å®è¡ãããŠããå Žåã¯å
éšããŒã«çšïŒã«å°å¿µããããšã¯ããã»ã©ç°¡åã§ã¯ãªããAMDããã»ããµãŒã¯ã»ãšãã©ã®å Žåããã€ãã£ãã«ãµããŒãããŠããŸããã äžæ¹ãx64ããã»ããµã§ã¯ãSSE2ã«ã¢ããªãªãªã«äŸåã§ããŸãïŒããã¯ä»æ§ã®äžéšã§ãïŒã
ç°åžžãªå®è¡
ããã»ããµã®ããã»ããµãã€ãã©ã€ã³ãã¢ãŠããªããªãŒããŒïŒOoOïŒã¢ãŒãã§å®è¡ãããŠãããå¿
èŠãªå
¥åããŒã¿ãå©çšã§ããªãããã«åœä»€ã®å®è¡ãé
ããŠããå Žåãããã»ããµã¯å
¥åããŒã¿ãæºåãããŠããåŸã®åœä»€ãèŠã€ããããšããŸãé çªã«æåã«å®è¡ããŸãã
åœä»€ã®å®è¡ãµã€ã¯ã«ïŒåœä»€ãµã€ã¯ã«ïŒïŒãŸãã¯ãåä¿¡-ãã³ãŒã-å®è¡ããµã€ã¯ã«ïŒã¯ãããã»ããµãã¡ã¢ãªããåœä»€ãåä¿¡ããå®è¡ããå¿
èŠããããã®ã決å®ããå®è¡ããããã»ã¹ã§ãã ç°åžžãªå®è¡ã¢ãŒãã§ã®ã³ãã³ãå®è¡ã®ãµã€ã¯ã«ã¯æ¬¡ã®ããã«ãªããŸãã
- åä¿¡/ãã³ãŒãïŒåœä»€ã¯L1IïŒåœä»€ãã£ãã·ã¥ïŒããæœåºãããŸãã 次ã«ãmicrooperationsãŸãã¯ÂµopsãšåŒã°ããå°ããªæäœã«å€æããŸãã
- ååã®å€æŽïŒã¬ãžã¹ã¿ãšããŒã¿éã®æ¢åã®äŸåé¢ä¿ã«ãã ãå®è¡ããã¯ãçºçããå ŽåããããŸãã ãã®åé¡ã解決ãã誀ã£ãäŸåé¢ä¿ãæé€ããããã«ãããã»ããµã¯å®éã®èšç®ã«äœ¿çšãããäžé£ã®ååã®ãªãå
éšã¬ãžã¹ã¿ãæäŸããŸãã ã¬ãžã¹ã¿åã®å€æŽã¯ã ã¢ãŒããã¯ãã£ã¬ãžã¹ã¿ ïŒè«çïŒãžã®åç
§ãååã®ãªãã¬ãžã¹ã¿ ïŒç©çïŒãžã®ãªã³ã¯ã«å€æããããã»ã¹ã§ãã
- ãªãªãŒããŒãããã¡ïŒåä¿¡æžã¿ã®é åºã§ä¿åãããŠããä¿çäžã®ãã€ã¯ãæäœãšããã§ã«å®äºããŠããããŸã å»æ£ãããŠããªããã®ãå«ãŸããŸãã
- ãã£ã¹ãããã³ã°ïŒäžŠã¹æ¿ããããã¡ãŒã«æ ŒçŽããããã€ã¯ãæäœã¯ãäŸåé¢ä¿ãšããŒã¿ã®å¯çšæ§ãèæ
®ããŠãä»»æã®é åºã§äžŠåå®è¡ã¢ãžã¥ãŒã«ã«è»¢éã§ããŸãã ãã€ã¯ããªãã¬ãŒã·ã§ã³ã®çµæã¯ããã€ã¯ããªãã¬ãŒã·ã§ã³èªäœãšãšãã«äžŠã¹æ¿ããããã¡ã«æžãæ»ãããŸãã
- 解éïŒãªã¿ã€ã¢ã¡ã³ããŠãããã¯ããããã¡å
ã®ãã€ã¯ããªãã¬ãŒã·ã§ã³ã®ã¹ããŒã¿ã¹ãåžžã«ãã§ãã¯ããå®è¡ããããã€ã¯ããªãã¬ãŒã·ã§ã³ã®çµæãã¢ãŒããã¯ãã£ã¬ãžã¹ã¿ïŒãŠãŒã¶ãŒãã¢ã¯ã»ã¹å¯èœïŒã«æžã蟌ã¿ããããã¡ãããã€ã¯ããªãã¬ãŒã·ã§ã³ãåé€ããŸãã
AMD Jaguarããã»ããµã¢ãŒããã¯ãã£AMD Jaguarããã»ããµã¢ãŒããã¯ãã£ã§ã¯ãäžèšã®ãã¹ãŠã®ãããã¯ãæ€åºã§ããŸãã æŽæ°ã³ã³ãã¢ã®å ŽåïŒ
- ãROMã®ãã³ãŒããšãã€ã¯ãã³ãŒãã
- =åä¿¡/ãã³ãŒãã¢ãžã¥ãŒã«
- ãInt Renameãããã³ãInt PRFãïŒç©çã¬ãžã¹ã¿ãã¡ã€ã«ïŒ
- =ã¢ãžã¥ãŒã«ã®ååå€æŽ
- ããã«ç€ºãããŠããªããªã¿ã€ã¢ã³ã³ãããŒã«ãŠãããïŒRCUïŒã¯ãã¬ãžã¹ã¿ã®ååå€æŽãšãã€ã¯ããªãã¬ãŒã·ã§ã³ã®åé€ãå¶åŸ¡ããŸãã
- ãã£ã¹ãããã£
- å
éšã¹ã±ãžã¥ãŒã©ïŒALUïŒ
- 1ã€ã®ãã€ã¯ãæäœãç°åžžãªé åºã§ã³ã³ãã¢ïŒå®è¡I0ããã³I1ã®2ã€ã®ALUã¢ãžã¥ãŒã«ïŒã«éä¿¡ã§ããŸãã
- AGU-managerïŒããŒã/ã¹ãã¬ãŒãžïŒ
- 1ã€ã®ãã€ã¯ãæäœãç°åžžãªé åºã§ã³ã³ãã€ãŒïŒ2ã€ã®AGUã¢ãžã¥ãŒã«ã®å®è¡LAGU b SAGUïŒã«è»¢éã§ããŸãã
ãã€ã¯ããªãã¬ãŒã·ã§ã³ã®äŸïŒ
µops add reg, reg 1: add add reg, [mem] 2: load, add addpd xmm, xmm 1: addpd addpd xmm, [mem] 2: load, addpd
Agner Webãµã€ãã®æ瀺ã®ãã°ããã
è¡šã®AMD Jaguarã»ã¯ã·ã§ã³ãèŠããšããã®ã³ãŒãã®å®è¡ãã€ãã©ã€ã³ãã©ã®ããã«èŠããããç解ã§ããŸãã
mov eax, [mem1] ; 1 - load imul eax, 5 ; 2 - mul add eax, [mem2] ; 3 - load, add mov [mem3], eax ; 4 - store (Jaguar) I0 | I1 | LAGU | SAGU | FP0 | FP1 | | 1-load | | | 2-mul | | 3-load | | | | 3-add | | | | | | | 4-store | |
ããã§ããã€ã¯ããªãã¬ãŒã·ã§ã³ã§åœä»€ãå£ãããšã«ãããããã»ããµãŒã¯äžŠåå®è¡ã¢ãžã¥ãŒã«ãå©çšããåœä»€ãå®è¡ããããšãã«é
延ãéšåçãŸãã¯å®å
šã«ãé ããããšãã§ããŸãïŒ
3-load
ã€ã®ç°ãªãã¢ãžã¥ãŒã«ã§
3-load
ãš
2-mul
ã䞊åã«å®è¡ãããŸãïŒã
ããããããã¯åžžã«å¯èœãšã¯éããŸããã
2-mul
ã
3-add
ã
4-store
éã®äŸåé¢ä¿ã®ãã§ãŒã³ã«ãããããã»ããµã¯ãããã®ãã€ã¯ããªãã¬ãŒã·ã§ã³ãåç·šæã§ããŸããïŒ
4-store
ã«ã¯
3-add
çµæãå¿
èŠã§ã
3-add
ã¯
2-mul
çµæãå¿
èŠã§ãïŒã ãããã£ãŠã䞊åå®è¡ã¢ãžã¥ãŒã«ãå¹æçã«äœ¿çšããã«ã¯ãäŸåé¢ä¿ã®é·ããã§ãŒã³ãé¿ããŠãã ããã
Visual Studioãªãã·ã§ã³
ã³ã³ãã€ã©ãŒã«ãã£ãŠçæãããã¢ã»ã³ãã©ãŒã説æããããã«ãmsvc ++ 14.0ïŒVS2015ïŒãšClangã䜿çšããŸãã åãããšãè¡ããç°ãªãã³ã³ãã€ã©ã®æ¯èŒã«æ
£ããããšã匷ããå§ãããŸãã ããã«ãããã·ã¹ãã ã®ãã¹ãŠã®ã³ã³ããŒãã³ããçžäºã«ã©ã®ããã«çžäºäœçšããããããããç解ããçæãããã³ãŒãã®å質ã«ã€ããŠå€æããã®ã«åœ¹ç«ã¡ãŸãã
ããã€ãã®æçšæ§ïŒ- [ã·ã³ãã«åã®è¡šç€º]ãªãã·ã§ã³ã¯ãåœä»€ã¢ãã¬ã¹ãŸãã¯ã¹ã¿ãã¯ã¢ãã¬ã¹ã®ä»£ããã«ãããŒã«ã«å€æ°ãšé¢æ°ã®ååãéã¢ã»ã³ãã«åœ¢åŒã§è¡šç€ºã§ããŸãã
- ã¢ã»ã³ãã©ãŒãããèªã¿ãããããŸãã
- ãããžã§ã¯ãèšå®> C / C ++>ã³ãŒãçæ>åºæ¬ã©ã³ã¿ã€ã ãã§ã㯠ãå€ãããã©ã«ãã«å€æŽããŸãã
- çµæã.asmãã¡ã€ã«ã«æžã蟌ã¿ãŸãã
- ãããžã§ã¯ãèšå®> C / C ++>åºåãã¡ã€ã«>ã¢ã»ã³ãã©åºå ãå€ãã¢ã»ã³ããªãšãœãŒã¹ã³ãŒãã«èšå®ããŸãã
- ãã¬ãŒã ãã€ã³ã¿ãŒãçç¥ãããšïŒãã¬ãŒã ãã€ã³ã¿ãŒã®çç¥ïŒãã³ã³ãã€ã©ãŒã¯ebpã䜿çšããŠã¹ã¿ãã¯ãå¶åŸ¡ããªãããã«æ瀺ãããŸãã
- / OyïŒx86ã®ã¿ãClangïŒ-fomit-frame-pointerãx64ã§åäœïŒ
åºæ¬çãªå解äŸ
ããã§ã¯ãéåžžã«åçŽãªC ++ã³ãŒãã®äŸãšãã®éã¢ã»ã³ããªãèŠãŠãããŸãã ãã¹ãŠã®ã¢ã»ã³ãã©ã³ãŒãã¯åç·šæãããå®å
šã«ããã¥ã¡ã³ãåãããŠãããããåå¿è
ã§ãç°¡åã«äœ¿çšã§ããŸãããåœä»€ã®åäœã«ã€ããŠçåããããã©ããã
確èªããããšããå§ãããŸãã
ç¥èŠãç°¡åã«ããããã«ãæ©èœã®ããããŒã°ãšãšãããŒã°ã¯åé€ãããŠããŸãããããã§ã¯ãããã«ã€ããŠã¯èª¬æããŸããã
泚ïŒããŒã«ã«å€æ°ã¯ã¹ã¿ãã¯ã§å®£èšãããŸãã ããšãã°ã
mov dword ptr [rbp + 4]ã0Ah; int b = 10ã¯ãããŒã«ã«å€æ° 'b'ãçžå¯Ÿã¢ãã¬ã¹ïŒãªãã»ããïŒ4ã§ã¹ã¿ãã¯ã«ããã·ã¥ããïŒrbpãåç
§ããïŒã0AhãŸãã¯10é²æ°ã§10ã«åæåãããããšãæå³ããŸãã
åçŽãªç²ŸåºŠã®æµ®åå°æ°ç¹æŒç®ç®è¡æµ®åå°æ°ç¹æŒç®ã¯ãx87 FPUïŒ80ããã粟床ãã¹ã«ã©ãŒïŒãŸãã¯SSEïŒ32ããããŸãã¯64ããã粟床ããã¯ãã«åïŒã䜿çšããŠå®è¡ã§ããŸãã X64ã¯åžžã«SSE2åœä»€ã®ã»ããããµããŒããã
ããã©ã«ãã§ã¯æµ®åå°æ°ç¹æŒç®ã«äœ¿çšã
ããŸã ã
SSEã䜿çšããåçŽãªç®è¡æµ®åå°æ°ç¹æŒç®ã msvc ++åæå- movss xmm0 ãdword ptr [adr]; xmm0ã®adrã«ããæµ®åå°æ°ç¹å€ãããŒãããŸã
- movss dword ptr [rbp]ãxmm0 ; ã¹ã¿ãã¯ã«ä¿åããŸãïŒfloat xïŒ
- ...; yããã³zãšåã
x * xãèšç®ããŸã- movss xmm0 ãdword ptr [rbp] ; ã¹ã«ã©ãŒxãxmm0ã«ããŒãããŸã
- mulss xmm0 ãdword ptr [rbp] ; xmm0ïŒ= xïŒã«xãæãã
y * yãèšç®ããx * xãå ç®ããŸã- movss xmm1 ãdword ptr [rbp + 4] ; xmm1ã«ã¹ã«ã©ãŒyãããŒãããŸã
- mulss xmm1 ãdword ptr [rbp + 4] ; xmm1ïŒ= yïŒã«yãæãã
- xmm0ãxmm1ãè¿œå ããŸãã xmm1ïŒy * yïŒã«xmm0ïŒx * xïŒãè¿œå ããŸã
z * zãèšç®ããx * x + y * yãå ç®ããŸã- movss xmm1 ãdword ptr [rbp + 8]; xmm1ã«ã¹ã«ã©ãŒzãããŒãããŸã
- mulss xmm1 ãdword ptr [rbp + 8]; xmm1ïŒ= zïŒã«zãæãã
- xmm0ãxmm1ãè¿œå ããŸãã xmm0ïŒx * x + y * yïŒã«xmm1ïŒz * zïŒãè¿œå ããŸã
æçµçµæãä¿åããŸãã- movss dword ptr [ rbp + 0Ch]ã xmm0 ; xmm0ãçµæã«ä¿åããŸã
- xor eaxãeax ; eax =0ãeaxã«ã¯mainïŒïŒã®æ»ãå€ãå«ãŸããŸã
ãã®äŸã§ã¯ãXMMã¬ãžã¹ã¿ã䜿çšããŠåäžã®æµ®åå°æ°ç¹å€ãæ ŒçŽããŸãã SSEã䜿çšãããšãç°ãªãããŒã¿åã§ãåäžã®å€ãšè€æ°ã®å€ã®äž¡æ¹ãæäœã§ããŸãã SSEã®è¿œå ã¹ããŒãã¡ã³ããèŠãŠãã ããã
- xmm0ãxmm1ãè¿œå ããŸãã 1ã€ã®ã¹ã«ã©ãŒå粟床浮åå°æ°ç¹å€ãšããŠã®åã¬ãžã¹ã¿ïŒ s calar s ingle粟床ã®æµ®åå°æ°ç¹å€ïŒ
- addps xmm0ãxmm1 ; 4ã€ã®ããã¯ãããå粟床浮åå°æ°ç¹å€ãšããŠã®åã¬ãžã¹ã¿ïŒããã¯ãããs ingle粟床ã®æµ®åå°æ°ç¹å€ïŒ
- xmm0ãxmm1ãè¿œå ã 1ã€ã®ã¹ã«ã©ãŒå粟床浮åå°æ°ç¹å€ãšããŠã®åã¬ãžã¹ã¿ãŒïŒ sã«ã©ãŒdå粟床浮åå°æ°ç¹å€ïŒ
- addpd xmm0ãxmm1 ; 2ã€ã®ããã¯ãããå粟床浮åå°æ°ç¹å€ãšããŠã®åã¬ãžã¹ã¿ãŒïŒããã¯ãããããã«ç²ŸåºŠã®æµ®åå°æ°ç¹å€ïŒ
- paddd xmm0ãxmm1 ; åã¬ãžã¹ã¿ãŒã¯ã4ã€ã®ããã¯ãããDWORDå€ïŒããã¯ãããDOUBLEã¯ãŒãïŒ32ãããæŽæ°ïŒå€ïŒ
åå²åå²ã®äŸã msvc ++åæå- mov dword ptr [ rbp ]ã5; 5ãã¹ã¿ãã¯ã«ä¿åããŸãïŒæŽæ°aïŒ
- mov dword ptr [ rbp +4]ã0Ah; ã¹ã¿ãã¯ã«10ãä¿åããŸãïŒæŽæ°bïŒ
- mov dword ptr [ rbp +8]ã0; ã¹ã¿ãã¯ã«0ãä¿åããŸãïŒæŽæ°ã®çµæïŒ
ç¶æ
- mov eax ãdword ptr [ rbp +4]; bãeaxã«ããŒãããŸã
- cmp dword ptr [ rbp ]ã eax ; aãšeaxïŒbïŒãæ¯èŒããŸã
- jge @ ECF81536; aãb以äžã®å Žåã«é·ç§»ããŸã
'then'çµæ= a- mov eax ãdword ptr [ rbp ]; AXãèªã¿èŸŒã¿ãŸã
- mov dword ptr [ rbp +8]ã eax ; eaxãã¹ã¿ãã¯ã«ä¿åããŸãïŒçµæïŒ
- jmp @ ECF8153C; ECF8153Cã«è¡ããŸã
'else'çµæ= b- ïŒECF81536ïŒmov eax ãdword ptr [ rbp +4]; bãeaxã«ããŒãããŸã
- mov dword ptr [rbp + 8]ãeax; eaxãã¹ã¿ãã¯ã«ä¿åããŸãïŒçµæïŒ
- ïŒECF8153CïŒ xor eaxãeax; eax =0ãeaxã«ã¯mainïŒïŒã®æ»ãå€ãå«ãŸããŸã
cmpåœä»€ã¯ãæåã®ãœãŒã¹ã®ãªãã©ã³ãã2çªç®ã®ãœãŒã¹ãšæ¯èŒããçµæã«åŸã£ãŠ
RFLAGSã¬ãžã¹ã¿ã®
ã¹ããŒã¿ã¹ãã©ã°ã
èšå®ããŸã ã
®FLAGSã¬ãžã¹ã¿ã¯ãããã»ããµã®çŸåšã®ç¶æ
ãå«ãx86ããã»ããµã¹ããŒã¿ã¹ã¬ãžã¹ã¿ã§ãã
cmpåœä»€ã¯éåžžãæ¡ä»¶
åå² ïŒäŸïŒ
jge ïŒãšçµã¿åãããŠäœ¿çšââãããŸãã é·ç§»ã§äœ¿çšãããæ¡ä»¶ã³ãŒãã¯ã
cmpåœä»€ã®çµæã«äŸåããŸãïŒ
RFLAGSæ¡ä»¶ã³ãŒãïŒã
æŽæ°ãšãforãã«ãŒãã䜿çšããç®è¡æŒç®ã¢ã»ã³ãã©ãŒã§ã¯ãã«ãŒãã¯äž»ã«äžé£ã®æ¡ä»¶åå²ïŒ= if ... gotoïŒãšããŠè¡šãããŸãã
æŽæ°ãšãforãã«ãŒãã䜿çšããç®è¡æŒç®ã msvc ++åæå- mov dword ptr [ rbp ]ã0; ã¹ã¿ãã¯ã«0ãä¿åããŸãïŒæŽæ°ã®åèšïŒ
- mov dword ptr [k]ã0Ah; ã¹ã¿ãã¯ã«10ãä¿åããŸãïŒæŽæ°kïŒ
- mov dword ptr [ rbp +8]ã0; ã«ãŒãã§ã®å埩ã®ããã«ã¹ã¿ãã¯ïŒæŽæ°iïŒã«0ãä¿åããŸã
- jmp main + 30h; ã¡ã€ã³+ 30æéã«ãªããŸã
iã®ã€ã³ã¯ãªã¡ã³ããæ
åœããã³ãŒãã®éšå- ïŒã¡ã€ã³+ 28hïŒmov eax ãdword ptr [ rbp +8]; iãeaxã«ããŒãããŸã
- inc eax ; å¢å
- mov dword ptr [ rbp +8]ã eax ; ã¹ã¿ãã¯ã«ä¿åããŸã
çµäºæ¡ä»¶ã®ãã¹ããæ
åœããã³ãŒãã®éšåïŒi> = kïŒ- ïŒã¡ã€ã³+ 30æéïŒmov eax ãdword ptr [k]; eaxã®ã¹ã¿ãã¯ããkãããŒãããŸã
- cmp dword ptr [ rbp +8]ã eax ; iãeaxïŒ= kïŒãšæ¯èŒããŸã
- jge main + 47h; iãk以äžã®å Žåãé·ç§»ãè¡ããŸãïŒãµã€ã¯ã«ãå®äºããŸãïŒ
ãå®éã®äœæ¥ãïŒsum + = i- mov eax ãdword ptr [ rbp +8]; iãeaxã«ããŒãããŸã
- mov ecx ãdword ptr [ rbp ]; ecxã«éãããŒãããŸã
- ecxãeaxãè¿œå ããŸãã eaxãšecxãã¹ã¿ãã¯ããŸãïŒecx = sum + iïŒ
- mov eaxãecx ; ecxãeaxã«è»¢éããŸã
- mov dword ptr [rbp]ãeax ; eaxïŒéïŒãã¹ã¿ãã¯ã«ä¿åããŸã
- jmp main + 28h; é·ç§»ãè¡ããã«ãŒãã®æ¬¡ã®å埩ãåŠçããŸã
- ïŒã¡ã€ã³+ 47hïŒxor eaxãeax ; eax =0ãeaxã«ã¯mainïŒïŒã®æ»ãå€ãå«ãŸããŸãã
SSEçµã¿èŸŒã¿é¢æ°, SSE ( â ). ,
:
- _mm_mul_ps mulps
- _mm_load_ps movaps
- _mm_add_ps addps
- _mm_store_ps movaps
SSE, msvc++(xmmword 128 dword)- (main+340h) movaps xmm1 , xmmword ptr [rdx+rax] ; 128- xmmword ( ) xs+i xmm1
- movaps xmm3 , xmmword ptr [rax] ; 4 ys+i xmm3
- movaps xmm0 , xmmword ptr [r8+rax] ; 4 zs+i xmm0
- movaps xmm2 , xmmword ptr [r9+rax] ; 4 ws+i xmm2
dot(v[i], A) = xi * Ax + yi * Ay + zi * Az + wi * Aw , (vertices) :- mulps xmm1, xmm4 ; xmm1 *= xmm4 xn.Ax, n [0..3]
- mulps xmm3, xmm5 ; xmm3 *= xmm5 yn.Ay, n [0..3]
- mulps xmm0, xmm6 ; xmm0 *= xmm6 zn.Az, n [0..3]
- mulps xmm2, xmm7 ; xmm2 *= xmm7 wn.Aw, n [0..3]
- addps xmm3, xmm1 ; xmm3 += xmm1 xn.Ax + yn.Ay
- addps xmm2, xmm0 ; xmm2 += xmm0 zn.Az + wn.Aw
- addps xmm2, xmm3 ; xmm2 += xmm3 xn.Ax + yn.Ay + zn.Az + wn.Aw
( + )- movaps xmmword ptr [r10 + rax], xmm2 ; 128- xmmword (4 ) , r10+rax
- add rax , 10h; 16 rax ( = 4 )
- sub r11,1 ; r11â,
- jne main+34h;
AVX (256-, 8 ):
_m256 Ax = _mm256_broadcast_ss(A); ... for (int i = 0; i < vertexCount; i+=8) // 8 (256-) { __m256 x4 = _mm256_load_ps(xs + i); .. __m256 dx = _mm256_mul_ps(Ax, x4); .. __m256 a0 = _mm256_add_ps(dx, dy); .. _mm256_store_ps(results + i, dots); }
(switch). msvc++- mov dword ptr [ rbp ], 0; 0 ( )
- mov eax , dword ptr [argc]; argc eax
- mov dword ptr [ rbp +44h], eax ;
- cmp dword ptr [ rbp +44h], 0; argc to 0
- je main+38h; if argc == 0, main+38h (case 0)
- cmp dword ptr [ rbp +44h], 1; argc 1
- je main+41h; if argc == 1, main+41h (case 1)
- cmp dword ptr [ rbp +44h], 2; argc 0
- je main+4Ah; if argc == 2, main+4Ah (case 2)
- cmp dword ptr [ rbp +44h], 3; argc 3
- je main+53h; if argc == 3, main+53h (case 3)
- jmp main+5Ch; main+5Ch ( )
Case 0- (main+38h) mov dword ptr [ rbp ], 1; 1 (val)
- jmp main+63h; main+63h,
Case 1- (main+41h) mov dword ptr [ rbp ], 3; 3 (val)
- jmp main+63h; main+63h,
...
- (main+63h) xor eax, eax ; eax = 0. eax main()
. ++- if-else, . .