SDL2:快速像素操作
SDL2: Fast Pixel Manipulation
我想在显示器上绘制像素,这些像素在某些参数后经常变化。例如。如果红色和绿色像素碰撞,它们都会消失,等等。
我必须在每一帧中处理大约 100 - 1000 个像素。我这里有一个多线程方法,它不会给我 30FPS(我想要的)。目前,我在 RAM 中存储了一个像素数组,其中包含所有像素并具有 SDL_Surface
。当数组中的像素发生变化时,它也会在 Surface 中发生变化,然后在完成所有操作后被 blits 显示到屏幕上。我目前的方法太慢了,我想了想如何提高速度。
我目前的想法是:
- 使用 OpenGL 直接在 GPU 上进行像素操作,一些论坛告诉我这比我目前的方法慢得多 "this is not how a GPU works"
- 不存储像素数组,直接将 BMP 存储在 RAM 中,对其进行操作,然后将其移动到
SDL_Surface
或 SDL_Texture
有没有其他方法可以快速操作像素?
SDL_CreateTexture()
w/SDL_TEXTUREACCESS_STREAMING
+ SDL_UpdateTexture()
似乎在正确的像素格式下工作得很好。
在我的系统上使用默认渲染器:
Renderer name: direct3d
Texture formats:
SDL_PIXELFORMAT_ARGB8888
SDL_PIXELFORMAT_YV12
SDL_PIXELFORMAT_IYUV
(尽管 opengl
信息相同:)
Renderer name: opengl
Texture formats:
SDL_PIXELFORMAT_ARGB8888
SDL_PIXELFORMAT_YV12
SDL_PIXELFORMAT_IYUV
SDL_PIXELFORMAT_ARGB8888
给我 ~1ms/frame:
// g++ main.cpp `pkg-config --cflags --libs sdl2`
#include <SDL.h>
#include <iostream>
#include <iomanip>
#include <vector>
#include <algorithm>
#include <chrono>
void PrintFrameTiming(std::ostream& os = std::cout, float period = 2.0f)
{
static unsigned int frames = 0;
frames++;
static auto start = std::chrono::steady_clock::now();
auto end = std::chrono::steady_clock::now();
float seconds = std::chrono::duration_cast< std::chrono::duration<float> >(end - start).count();
if( seconds > period )
{
float spf = seconds / frames;
os
<< frames << " frames in "
<< std::setprecision(1) << std::fixed << seconds << " seconds = "
<< std::setprecision(1) << std::fixed << 1.0f / spf << " FPS ("
<< std::setprecision(3) << std::fixed << spf * 1000.0f << " ms/frame)\n";
frames = 0;
start = end;
}
}
int main( int, char** )
{
SDL_Init( SDL_INIT_EVERYTHING );
SDL_Window* window = SDL_CreateWindow( "SDL", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 600, 600, SDL_WINDOW_SHOWN );
SDL_Renderer* renderer = SDL_CreateRenderer( window, -1, SDL_RENDERER_ACCELERATED );
SDL_SetHint( SDL_HINT_RENDER_SCALE_QUALITY, "1" );
// dump renderer info
SDL_RendererInfo info;
SDL_GetRendererInfo( renderer, &info );
std::cout << "Renderer name: " << info.name << '\n';
std::cout << "Texture formats: " << '\n';
for( Uint32 i = 0; i < info.num_texture_formats; i++ )
{
std::cout << SDL_GetPixelFormatName( info.texture_formats[i] ) << '\n';
}
// create texture
const unsigned int texWidth = 1024;
const unsigned int texHeight = 1024;
SDL_Texture* texture = SDL_CreateTexture( renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, texWidth, texHeight );
std::vector< unsigned char > pixels( texWidth * texHeight * 4, 0 );
bool useLocktexture = false;
// main loop
bool running = true;
while( running )
{
SDL_SetRenderDrawColor( renderer, 0, 0, 0, SDL_ALPHA_OPAQUE );
SDL_RenderClear( renderer );
// handle events
SDL_Event ev;
while( SDL_PollEvent( &ev ) )
{
if( ( SDL_QUIT == ev.type ) ||
( SDL_KEYDOWN == ev.type && SDL_SCANCODE_ESCAPE == ev.key.keysym.scancode ) )
{
running = false;
break;
}
if( SDL_KEYDOWN == ev.type && SDL_SCANCODE_L == ev.key.keysym.scancode )
{
useLocktexture = !useLocktexture;
std::cout << "Using " << ( useLocktexture ? "SDL_LockTexture() + std::copy_n()" : "SDL_UpdateTexture()" ) << '\n';
}
}
// splat down some random pixels
for( unsigned int i = 0; i < 1000; i++ )
{
const unsigned int x = rand() % texWidth;
const unsigned int y = rand() % texHeight;
const unsigned int offset = ( texWidth * y * 4 ) + x * 4;
pixels[ offset + 0 ] = rand() % 256; // b
pixels[ offset + 1 ] = rand() % 256; // g
pixels[ offset + 2 ] = rand() % 256; // r
pixels[ offset + 3 ] = SDL_ALPHA_OPAQUE; // a
}
// update texture
if( useLocktexture )
{
unsigned char* lockedPixels = nullptr;
int pitch = 0;
SDL_LockTexture( texture, nullptr, reinterpret_cast< void** >( &lockedPixels ), &pitch );
std::copy_n( pixels.data(), pixels.size(), lockedPixels );
SDL_UnlockTexture( texture );
}
else
{
SDL_UpdateTexture( texture, nullptr, pixels.data(), texWidth * 4 );
}
SDL_RenderCopy( renderer, texture, nullptr, nullptr );
SDL_RenderPresent( renderer );
PrintFrameTiming();
}
SDL_DestroyRenderer( renderer );
SDL_DestroyWindow( window );
SDL_Quit();
return 0;
}
确保你没有启用 vsync(在驱动程序中强制启用,运行 合成器等)否则 all 你的帧时间将是~16ms(或任何你的显示刷新设置)。
我想在显示器上绘制像素,这些像素在某些参数后经常变化。例如。如果红色和绿色像素碰撞,它们都会消失,等等。
我必须在每一帧中处理大约 100 - 1000 个像素。我这里有一个多线程方法,它不会给我 30FPS(我想要的)。目前,我在 RAM 中存储了一个像素数组,其中包含所有像素并具有 SDL_Surface
。当数组中的像素发生变化时,它也会在 Surface 中发生变化,然后在完成所有操作后被 blits 显示到屏幕上。我目前的方法太慢了,我想了想如何提高速度。
我目前的想法是:
- 使用 OpenGL 直接在 GPU 上进行像素操作,一些论坛告诉我这比我目前的方法慢得多 "this is not how a GPU works"
- 不存储像素数组,直接将 BMP 存储在 RAM 中,对其进行操作,然后将其移动到
SDL_Surface
或SDL_Texture
有没有其他方法可以快速操作像素?
SDL_CreateTexture()
w/SDL_TEXTUREACCESS_STREAMING
+ SDL_UpdateTexture()
似乎在正确的像素格式下工作得很好。
在我的系统上使用默认渲染器:
Renderer name: direct3d
Texture formats:
SDL_PIXELFORMAT_ARGB8888
SDL_PIXELFORMAT_YV12
SDL_PIXELFORMAT_IYUV
(尽管 opengl
信息相同:)
Renderer name: opengl
Texture formats:
SDL_PIXELFORMAT_ARGB8888
SDL_PIXELFORMAT_YV12
SDL_PIXELFORMAT_IYUV
SDL_PIXELFORMAT_ARGB8888
给我 ~1ms/frame:
// g++ main.cpp `pkg-config --cflags --libs sdl2`
#include <SDL.h>
#include <iostream>
#include <iomanip>
#include <vector>
#include <algorithm>
#include <chrono>
void PrintFrameTiming(std::ostream& os = std::cout, float period = 2.0f)
{
static unsigned int frames = 0;
frames++;
static auto start = std::chrono::steady_clock::now();
auto end = std::chrono::steady_clock::now();
float seconds = std::chrono::duration_cast< std::chrono::duration<float> >(end - start).count();
if( seconds > period )
{
float spf = seconds / frames;
os
<< frames << " frames in "
<< std::setprecision(1) << std::fixed << seconds << " seconds = "
<< std::setprecision(1) << std::fixed << 1.0f / spf << " FPS ("
<< std::setprecision(3) << std::fixed << spf * 1000.0f << " ms/frame)\n";
frames = 0;
start = end;
}
}
int main( int, char** )
{
SDL_Init( SDL_INIT_EVERYTHING );
SDL_Window* window = SDL_CreateWindow( "SDL", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 600, 600, SDL_WINDOW_SHOWN );
SDL_Renderer* renderer = SDL_CreateRenderer( window, -1, SDL_RENDERER_ACCELERATED );
SDL_SetHint( SDL_HINT_RENDER_SCALE_QUALITY, "1" );
// dump renderer info
SDL_RendererInfo info;
SDL_GetRendererInfo( renderer, &info );
std::cout << "Renderer name: " << info.name << '\n';
std::cout << "Texture formats: " << '\n';
for( Uint32 i = 0; i < info.num_texture_formats; i++ )
{
std::cout << SDL_GetPixelFormatName( info.texture_formats[i] ) << '\n';
}
// create texture
const unsigned int texWidth = 1024;
const unsigned int texHeight = 1024;
SDL_Texture* texture = SDL_CreateTexture( renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, texWidth, texHeight );
std::vector< unsigned char > pixels( texWidth * texHeight * 4, 0 );
bool useLocktexture = false;
// main loop
bool running = true;
while( running )
{
SDL_SetRenderDrawColor( renderer, 0, 0, 0, SDL_ALPHA_OPAQUE );
SDL_RenderClear( renderer );
// handle events
SDL_Event ev;
while( SDL_PollEvent( &ev ) )
{
if( ( SDL_QUIT == ev.type ) ||
( SDL_KEYDOWN == ev.type && SDL_SCANCODE_ESCAPE == ev.key.keysym.scancode ) )
{
running = false;
break;
}
if( SDL_KEYDOWN == ev.type && SDL_SCANCODE_L == ev.key.keysym.scancode )
{
useLocktexture = !useLocktexture;
std::cout << "Using " << ( useLocktexture ? "SDL_LockTexture() + std::copy_n()" : "SDL_UpdateTexture()" ) << '\n';
}
}
// splat down some random pixels
for( unsigned int i = 0; i < 1000; i++ )
{
const unsigned int x = rand() % texWidth;
const unsigned int y = rand() % texHeight;
const unsigned int offset = ( texWidth * y * 4 ) + x * 4;
pixels[ offset + 0 ] = rand() % 256; // b
pixels[ offset + 1 ] = rand() % 256; // g
pixels[ offset + 2 ] = rand() % 256; // r
pixels[ offset + 3 ] = SDL_ALPHA_OPAQUE; // a
}
// update texture
if( useLocktexture )
{
unsigned char* lockedPixels = nullptr;
int pitch = 0;
SDL_LockTexture( texture, nullptr, reinterpret_cast< void** >( &lockedPixels ), &pitch );
std::copy_n( pixels.data(), pixels.size(), lockedPixels );
SDL_UnlockTexture( texture );
}
else
{
SDL_UpdateTexture( texture, nullptr, pixels.data(), texWidth * 4 );
}
SDL_RenderCopy( renderer, texture, nullptr, nullptr );
SDL_RenderPresent( renderer );
PrintFrameTiming();
}
SDL_DestroyRenderer( renderer );
SDL_DestroyWindow( window );
SDL_Quit();
return 0;
}
确保你没有启用 vsync(在驱动程序中强制启用,运行 合成器等)否则 all 你的帧时间将是~16ms(或任何你的显示刷新设置)。