OSDN Git Service

improve GLES jumptables
authorMathias Agopian <mathias@google.com>
Sat, 15 Jun 2013 02:08:36 +0000 (19:08 -0700)
committerMathias Agopian <mathias@google.com>
Mon, 1 Jul 2013 22:52:47 +0000 (15:52 -0700)
in the common case this saves one instructions per jump
(which will help with the i-cache).

this change also gets rid of the "use slow tls" option,
which was useless. So at least now architectures that don't have
assembly bindings will perform much better.

Change-Id: I31be6c06ad2136b50ef3a1ac14682d7812ad40d2

opengl/libs/EGL/egl.cpp
opengl/libs/EGL/eglApi.cpp
opengl/libs/EGL/getProcAddress.cpp
opengl/libs/GLES2/gl2.cpp
opengl/libs/GLES_CM/gl.cpp
opengl/libs/hooks.h

index 6ac8724..86637dc 100644 (file)
@@ -230,9 +230,6 @@ static int gl_no_context() {
 
 static void early_egl_init(void)
 {
-#if !USE_FAST_TLS_KEY
-    pthread_key_create(&gGLWrapperKey, NULL);
-#endif
 #if EGL_TRACE
     pthread_key_create(&gGLTraceKey, NULL);
     initEglTraceLevel();
@@ -341,42 +338,11 @@ void gl_noop() {
 
 // ----------------------------------------------------------------------------
 
-#if USE_FAST_TLS_KEY
-
-// We have a dedicated TLS slot in bionic
-static inline gl_hooks_t const * volatile * get_tls_hooks() {
-    volatile void *tls_base = __get_tls();
-    gl_hooks_t const * volatile * tls_hooks =
-            reinterpret_cast<gl_hooks_t const * volatile *>(tls_base);
-    return tls_hooks;
-}
-
 void setGlThreadSpecific(gl_hooks_t const *value) {
     gl_hooks_t const * volatile * tls_hooks = get_tls_hooks();
     tls_hooks[TLS_SLOT_OPENGL_API] = value;
 }
 
-gl_hooks_t const* getGlThreadSpecific() {
-    gl_hooks_t const * volatile * tls_hooks = get_tls_hooks();
-    gl_hooks_t const* hooks = tls_hooks[TLS_SLOT_OPENGL_API];
-    if (hooks) return hooks;
-    return &gHooksNoContext;
-}
-
-#else
-
-void setGlThreadSpecific(gl_hooks_t const *value) {
-    pthread_setspecific(gGLWrapperKey, value);
-}
-
-gl_hooks_t const* getGlThreadSpecific() {
-    gl_hooks_t const* hooks =  static_cast<gl_hooks_t*>(pthread_getspecific(gGLWrapperKey));
-    if (hooks) return hooks;
-    return &gHooksNoContext;
-}
-
-#endif
-
 // ----------------------------------------------------------------------------
 // GL / EGL hooks
 // ----------------------------------------------------------------------------
index 2bc9851..0358fcc 100644 (file)
@@ -849,9 +849,7 @@ __eglMustCastToProperFunctionPointerType eglGetProcAddress(const char *procname)
             }
 
             if (found) {
-#if USE_FAST_TLS_KEY
                 addr = gExtensionForwarders[slot];
-#endif
                 sGLExtentionMap.add(name, addr);
                 sGLExtentionSlot++;
             }
index c160aa0..add2a79 100644 (file)
@@ -34,9 +34,7 @@ namespace android {
 #undef GL_EXTENSION_LIST
 #undef GET_TLS
 
-#if USE_FAST_TLS_KEY
-
-    #if defined(__arm__)
+#if defined(__arm__)
 
     #define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
 
@@ -58,7 +56,7 @@ namespace android {
             :                                                   \
             );
 
-    #elif defined(__mips__)
+#elif defined(__mips__)
 
         #define API_ENTRY(_api) __attribute__((noinline)) _api
 
@@ -88,27 +86,21 @@ namespace android {
                                           ext.extensions[_api]))    \
                 :                                                   \
             );
+#endif
 
-    #else
-        #error Unsupported architecture
-    #endif
-
+#if defined(CALL_GL_EXTENSION_API)
     #define GL_EXTENSION_NAME(_n)   __glExtFwd##_n
 
     #define GL_EXTENSION(_n)                         \
         void API_ENTRY(GL_EXTENSION_NAME(_n))() {    \
             CALL_GL_EXTENSION_API(_n);               \
         }
-
-
 #else
+        #define GL_EXTENSION_NAME(_n) NULL
 
-    #define GL_EXTENSION_NAME(_n) NULL
-
-    #define GL_EXTENSION(_n)
-
-    #warning "eglGetProcAddress() partially supported"
+        #define GL_EXTENSION(_n)
 
+        #warning "eglGetProcAddress() partially supported"
 #endif
 
 
index fad2176..3134e56 100644 (file)
@@ -40,13 +40,11 @@ using namespace android;
 #undef CALL_GL_API
 #undef CALL_GL_API_RETURN
 
-#if USE_FAST_TLS_KEY
-
-  #if defined(__arm__)
+#if defined(__arm__) && !USE_SLOW_BINDING
 
     #define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
 
-    #define API_ENTRY(_api) __attribute__((naked)) _api
+    #define API_ENTRY(_api) __attribute__((noinline)) _api
 
     #define CALL_GL_API(_api, ...)                              \
          asm volatile(                                          \
@@ -54,15 +52,13 @@ using namespace android;
             "ldr   r12, [r12, %[tls]] \n"                       \
             "cmp   r12, #0            \n"                       \
             "ldrne pc,  [r12, %[api]] \n"                       \
-            "mov   r0, #0             \n"                       \
-            "bx    lr                 \n"                       \
             :                                                   \
             : [tls] "J"(TLS_SLOT_OPENGL_API*4),                 \
               [api] "J"(__builtin_offsetof(gl_hooks_t, gl._api))    \
             :                                                   \
             );
 
-  #elif defined(__mips__)
+#elif defined(__mips__) && !USE_SLOW_BINDING
 
     #define API_ENTRY(_api) __attribute__((noinline)) _api
 
@@ -94,30 +90,21 @@ using namespace android;
             :                                                    \
             );
 
-  #else
-
-    #error Unsupported architecture
-
-  #endif
-
-    #define CALL_GL_API_RETURN(_api, ...) \
-        CALL_GL_API(_api, __VA_ARGS__) \
-        return 0; // placate gcc's warnings. never reached.
-
 #else
 
     #define API_ENTRY(_api) _api
 
     #define CALL_GL_API(_api, ...)                                       \
         gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;  \
-        _c->_api(__VA_ARGS__);
-
-    #define CALL_GL_API_RETURN(_api, ...)                                \
-        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;  \
-        return _c->_api(__VA_ARGS__)
+        if (_c) return _c->_api(__VA_ARGS__);
 
 #endif
 
+#define CALL_GL_API_RETURN(_api, ...) \
+    CALL_GL_API(_api, __VA_ARGS__) \
+    return 0;
+
+
 
 extern "C" {
 #include "gl3_api.in"
@@ -139,7 +126,8 @@ const GLubyte * glGetString(GLenum name)
 {
     const GLubyte * ret = egl_get_string_for_current_context(name);
     if (ret == NULL) {
-        ret = __glGetString(name);
+        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;
+        ret = _c->glGetString(name);
     }
     return ret;
 }
index a5bbdc6..18ef6f9 100644 (file)
@@ -31,9 +31,6 @@
 
 using namespace android;
 
-// set this to 1 for crude GL debugging
-#define CHECK_FOR_GL_ERRORS     0
-
 // ----------------------------------------------------------------------------
 // extensions for the framework
 // ----------------------------------------------------------------------------
@@ -95,13 +92,11 @@ GL_API void GL_APIENTRY glWeightPointerOESBounds(GLint size, GLenum type,
 #undef CALL_GL_API
 #undef CALL_GL_API_RETURN
 
-#if USE_FAST_TLS_KEY && !CHECK_FOR_GL_ERRORS
-
-  #if defined(__arm__)
+#if defined(__arm__) && !USE_SLOW_BINDING
 
     #define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
 
-    #define API_ENTRY(_api) __attribute__((naked)) _api
+    #define API_ENTRY(_api) __attribute__((noinline)) _api
 
     #define CALL_GL_API(_api, ...)                              \
          asm volatile(                                          \
@@ -109,15 +104,13 @@ GL_API void GL_APIENTRY glWeightPointerOESBounds(GLint size, GLenum type,
             "ldr   r12, [r12, %[tls]] \n"                       \
             "cmp   r12, #0            \n"                       \
             "ldrne pc,  [r12, %[api]] \n"                       \
-            "mov   r0, #0             \n"                       \
-            "bx    lr                 \n"                       \
             :                                                   \
             : [tls] "J"(TLS_SLOT_OPENGL_API*4),                 \
               [api] "J"(__builtin_offsetof(gl_hooks_t, gl._api))    \
             :                                                   \
             );
 
-  #elif defined(__mips__)
+#elif defined(__mips__) && !USE_SLOW_BINDING
 
     #define API_ENTRY(_api) __attribute__((noinline)) _api
 
@@ -149,43 +142,20 @@ GL_API void GL_APIENTRY glWeightPointerOESBounds(GLint size, GLenum type,
             :                                                    \
             );
 
-  #else
-    #error Unsupported architecture
-  #endif
-
-    #define CALL_GL_API_RETURN(_api, ...) \
-        CALL_GL_API(_api, __VA_ARGS__) \
-        return 0; // placate gcc's warnings. never reached.
-
 #else
 
-    #if CHECK_FOR_GL_ERRORS
-    
-        #define CHECK_GL_ERRORS(_api) \
-            do { GLint err = glGetError(); \
-                ALOGE_IF(err != GL_NO_ERROR, "%s failed (0x%04X)", #_api, err); \
-            } while(false);
-
-    #else
-
-        #define CHECK_GL_ERRORS(_api) do { } while(false);
-
-    #endif
-
-
     #define API_ENTRY(_api) _api
 
-    #define CALL_GL_API(_api, ...)                                      \
-        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl; \
-        _c->_api(__VA_ARGS__);                                          \
-        CHECK_GL_ERRORS(_api)
-
-    #define CALL_GL_API_RETURN(_api, ...)                               \
-        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl; \
-        return _c->_api(__VA_ARGS__)
+    #define CALL_GL_API(_api, ...)                                       \
+        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;  \
+        if (_c) return _c->_api(__VA_ARGS__);
 
 #endif
 
+#define CALL_GL_API_RETURN(_api, ...) \
+    CALL_GL_API(_api, __VA_ARGS__) \
+    return 0;
+
 
 extern "C" {
 #include "gl_api.in"
@@ -202,11 +172,11 @@ extern "C" {
 
 extern "C" const GLubyte * __glGetString(GLenum name);
 
-const GLubyte * glGetString(GLenum name)
-{
+const GLubyte * glGetString(GLenum name) {
     const GLubyte * ret = egl_get_string_for_current_context(name);
     if (ret == NULL) {
-        ret = __glGetString(name);
+        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;
+        ret = _c->glGetString(name);
     }
     return ret;
 }
index b2a684c..4b43198 100644 (file)
 #include <GLES3/gl3.h>
 #include <GLES3/gl3ext.h>
 
-#if !defined(__arm__) && !defined(__mips__)
-#define USE_SLOW_BINDING            1
-#else
-#define USE_SLOW_BINDING            0
-#endif
+// set to 1 for debugging
+#define USE_SLOW_BINDING    0
+
 #undef NELEM
-#define NELEM(x)                    (sizeof(x)/sizeof(*(x)))
+#define NELEM(x)            (sizeof(x)/sizeof(*(x)))
 
 // maximum number of GL extensions that can be used simultaneously in
 // a given process. this limitation exists because we need to have
 #define MAX_NUMBER_OF_GL_EXTENSIONS 256
 
 
-#if defined(HAVE_ANDROID_OS) && !USE_SLOW_BINDING && __OPTIMIZE__
-#define USE_FAST_TLS_KEY            1
-#else
-#define USE_FAST_TLS_KEY            0
-#endif
-
-#if USE_FAST_TLS_KEY
-#   include <bionic_tls.h>  /* special private C library header */
-#endif
+#include <bionic_tls.h>  /* special private C library header */
 
 // ----------------------------------------------------------------------------
 namespace android {
@@ -84,7 +74,20 @@ struct gl_hooks_t {
 #undef EGL_ENTRY
 
 EGLAPI void setGlThreadSpecific(gl_hooks_t const *value);
-EGLAPI gl_hooks_t const* getGlThreadSpecific();
+
+// We have a dedicated TLS slot in bionic
+inline gl_hooks_t const * volatile * get_tls_hooks() {
+    volatile void *tls_base = __get_tls();
+    gl_hooks_t const * volatile * tls_hooks =
+            reinterpret_cast<gl_hooks_t const * volatile *>(tls_base);
+    return tls_hooks;
+}
+
+inline EGLAPI gl_hooks_t const* getGlThreadSpecific() {
+    gl_hooks_t const * volatile * tls_hooks = get_tls_hooks();
+    gl_hooks_t const* hooks = tls_hooks[TLS_SLOT_OPENGL_API];
+    return hooks;
+}
 
 // ----------------------------------------------------------------------------
 }; // namespace android