From 45b0b8bff8fe4e48200027ac047da6b2f8c32335 Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Fri, 27 Mar 2020 09:19:37 +0000 Subject: [PATCH 1/7] GLES2 2d Batch rendering (across items) 2d rendering is currently bottlenecked by drawing primitives one at a time, limiting OpenGL efficiency. This PR batches primitives and renders in fewer drawcalls, resulting in significant performance improvements. This also speeds up text rendering. This PR batches across canvas items as well as within items. The code dynamically chooses between a vertex format with and without color, depending on the input data for a frame, in order to optimize throughput and maximize batch size. It also adds an option to use glScissor to reduce fillrate in light passes. --- doc/classes/ProjectSettings.xml | 21 + drivers/gles2/rasterizer_array_gles2.h | 190 + .../gles2/rasterizer_canvas_base_gles2.cpp | 1010 +++++ drivers/gles2/rasterizer_canvas_base_gles2.h | 140 + drivers/gles2/rasterizer_canvas_gles2.cpp | 3733 ++++++++++------- drivers/gles2/rasterizer_canvas_gles2.h | 312 +- servers/visual_server.cpp | 13 + 7 files changed, 3737 insertions(+), 1682 deletions(-) create mode 100644 drivers/gles2/rasterizer_array_gles2.h create mode 100644 drivers/gles2/rasterizer_canvas_base_gles2.cpp create mode 100644 drivers/gles2/rasterizer_canvas_base_gles2.h diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index e9aa79742dae..713103a29f9c 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -969,6 +969,27 @@ [Environment] that will be used as a fallback environment in case a scene does not specify its own environment. The default environment is loaded in at scene load time regardless of whether you have set an environment or not. If you do not rely on the fallback environment, it is best to delete [code]default_env.tres[/code], or to specify a different default environment here. + + Size of buffer reserved for batched vertices. Larger size enables larger batches, but there are diminishing returns for the memory used. + + + Including color in the vertex format has a cost, however, not including color prevents batching across color changes. This threshold determines the ratio of [code]number of vertex color changes / total number of vertices[/code] above which vertices will be translated to colored format. A value of 0 will always use colored vertices, 1 will never use colored vertices. + + + Sets the proportion of the screen area that must be saved by a scissor operation in order to activate light scissoring. This can prevent parts of items being rendered outside the light area. Lower values scissor more aggressively. A value of 1 scissors none of the items, a value of 0 scissors every item. This can reduce fill rate requirements in scenes with a lot of lighting. + + + Sets the number of commands to lookahead to determine whether to batch render items. A value of 1 can join items consisting of single commands, 0 turns off joining. Higher values are in theory more likely to join, however this has diminishing returns and has a runtime cost so a small value is recommended. + + + Turns batching on and off. Batching increases performance by reducing the amount of graphics API drawcalls. + + + [b]Experimental[/b] For regression testing against the old renderer. If this is switched on, and [code]use_batching[/code] is set, the renderer will swap alternately between using the old renderer, and the batched renderer, on each frame. This makes it easy to identify visual differences. Performance will be degraded. + + + [b]Experimental[/b] Switches on batching within the editor. Use with caution - note that if your editor does not render correctly you may need to edit your [code]project.godot[/code] and remove the use_batching_in_editor setting manually. + Max buffer size for blend shapes. Any blend shape bigger than this will not work. diff --git a/drivers/gles2/rasterizer_array_gles2.h b/drivers/gles2/rasterizer_array_gles2.h new file mode 100644 index 000000000000..30a4dd823a97 --- /dev/null +++ b/drivers/gles2/rasterizer_array_gles2.h @@ -0,0 +1,190 @@ +#pragma once + +/*************************************************************************/ +/* rasterizer_array_gles2.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +/** + * Fast single-threaded growable array for POD types. + * For use in render drivers, not for general use. + * TO BE REPLACED by local_vector. +*/ + +#include "core/os/memory.h" +#include "core/vector.h" + +#include + +template +class RasterizerArrayGLES2 { +public: + RasterizerArrayGLES2() { + _list = 0; + _size = 0; + _max_size = 0; + } + ~RasterizerArrayGLES2() { free(); } + + _FORCE_INLINE_ T &operator[](unsigned int ui) { return _list[ui]; } + _FORCE_INLINE_ const T &operator[](unsigned int ui) const { return _list[ui]; } + + void free() { + if (_list) { + memdelete_arr(_list); + _list = 0; + } + _size = 0; + _max_size = 0; + } + + void create(int p_size) { + free(); + if (p_size) { + _list = memnew_arr(T, p_size); + } + _size = 0; + _max_size = p_size; + } + + _FORCE_INLINE_ void reset() { _size = 0; } + + _FORCE_INLINE_ T *request_with_grow() { + T *p = request(); + if (!p) { + grow(); + return request_with_grow(); + } + return p; + } + + // none of that inefficient pass by value stuff here, thanks + _FORCE_INLINE_ T *request() { + if (_size < _max_size) { + return &_list[_size++]; + } + return 0; + } + + // several items at a time + _FORCE_INLINE_ T *request(int p_num_items) { + int old_size = _size; + _size += p_num_items; + + if (_size <= _max_size) { + return &_list[old_size]; + } + + // revert + _size = old_size; + return 0; + } + + _FORCE_INLINE_ int size() const { return _size; } + _FORCE_INLINE_ int max_size() const { return _max_size; } + _FORCE_INLINE_ const T *get_data() const { return _list; } + + bool copy_from(const RasterizerArrayGLES2 &o) { + // no resizing done here, it should be done manually + if (o.size() > _max_size) + return false; + + // pod types only please! + memcpy(_list, o.get_data(), o.size() * sizeof(T)); + _size = o.size(); + return true; + } + + // if you want this to be cheap, call reset before grow, + // to ensure there is no data to copy + void grow() { + unsigned int new_max_size = _max_size * 2; + if (!new_max_size) + new_max_size = 1; + + T *new_list = memnew_arr(T, new_max_size); + + // copy .. pod types only + if (_list) { + memcpy(new_list, _list, _size * sizeof(T)); + } + + unsigned int new_size = size(); + free(); + _list = new_list; + _size = new_size; + _max_size = new_max_size; + } + +private: + T *_list; + int _size; + int _max_size; +}; + +template +class RasterizerArray_non_pod_GLES2 { +public: + RasterizerArray_non_pod_GLES2() { + _size = 0; + } + + const T &operator[](unsigned int ui) const { return _list[ui]; } + + void create(int p_size) { + _list.resize(p_size); + _size = 0; + } + void reset() { _size = 0; } + + void push_back(const T &val) { + while (true) { + if (_size < max_size()) { + _list.set(_size, val); + _size++; + return; + } + + grow(); + } + } + + int size() const { return _size; } + int max_size() const { return _list.size(); } + +private: + void grow() { + unsigned int new_max_size = _list.size() * 2; + if (!new_max_size) + new_max_size = 1; + _list.resize(new_max_size); + } + + Vector _list; + int _size; +}; diff --git a/drivers/gles2/rasterizer_canvas_base_gles2.cpp b/drivers/gles2/rasterizer_canvas_base_gles2.cpp new file mode 100644 index 000000000000..86a109417a6c --- /dev/null +++ b/drivers/gles2/rasterizer_canvas_base_gles2.cpp @@ -0,0 +1,1010 @@ +/*************************************************************************/ +/* rasterizer_canvas_base_gles2.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "rasterizer_canvas_base_gles2.h" + +#include "core/os/os.h" +#include "core/project_settings.h" +#include "rasterizer_scene_gles2.h" +#include "servers/visual/visual_server_raster.h" + +#ifndef GLES_OVER_GL +#define glClearDepth glClearDepthf +#endif + +RID RasterizerCanvasBaseGLES2::light_internal_create() { + + return RID(); +} + +void RasterizerCanvasBaseGLES2::light_internal_update(RID p_rid, Light *p_light) { +} + +void RasterizerCanvasBaseGLES2::light_internal_free(RID p_rid) { +} + +void RasterizerCanvasBaseGLES2::canvas_begin() { + + state.canvas_shader.bind(); + state.using_transparent_rt = false; + int viewport_x, viewport_y, viewport_width, viewport_height; + + if (storage->frame.current_rt) { + glBindFramebuffer(GL_FRAMEBUFFER, storage->frame.current_rt->fbo); + state.using_transparent_rt = storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]; + + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_DIRECT_TO_SCREEN]) { + // set Viewport and Scissor when rendering directly to screen + viewport_width = storage->frame.current_rt->width; + viewport_height = storage->frame.current_rt->height; + viewport_x = storage->frame.current_rt->x; + viewport_y = OS::get_singleton()->get_window_size().height - viewport_height - storage->frame.current_rt->y; + glScissor(viewport_x, viewport_y, viewport_width, viewport_height); + glViewport(viewport_x, viewport_y, viewport_width, viewport_height); + glEnable(GL_SCISSOR_TEST); + } + } + + if (storage->frame.clear_request) { + glClearColor(storage->frame.clear_request_color.r, + storage->frame.clear_request_color.g, + storage->frame.clear_request_color.b, + state.using_transparent_rt ? storage->frame.clear_request_color.a : 1.0); + glClear(GL_COLOR_BUFFER_BIT); + storage->frame.clear_request = false; + } + + /* + if (storage->frame.current_rt) { + glBindFramebuffer(GL_FRAMEBUFFER, storage->frame.current_rt->fbo); + glColorMask(1, 1, 1, 1); + } + */ + + reset_canvas(); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); + + glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); + glDisableVertexAttribArray(VS::ARRAY_COLOR); + + // set up default uniforms + + Transform canvas_transform; + + if (storage->frame.current_rt) { + + float csy = 1.0; + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) { + csy = -1.0; + } + canvas_transform.translate(-(storage->frame.current_rt->width / 2.0f), -(storage->frame.current_rt->height / 2.0f), 0.0f); + canvas_transform.scale(Vector3(2.0f / storage->frame.current_rt->width, csy * -2.0f / storage->frame.current_rt->height, 1.0f)); + } else { + Vector2 ssize = OS::get_singleton()->get_window_size(); + canvas_transform.translate(-(ssize.width / 2.0f), -(ssize.height / 2.0f), 0.0f); + canvas_transform.scale(Vector3(2.0f / ssize.width, -2.0f / ssize.height, 1.0f)); + } + + state.uniforms.projection_matrix = canvas_transform; + + state.uniforms.final_modulate = Color(1, 1, 1, 1); + + state.uniforms.modelview_matrix = Transform2D(); + state.uniforms.extra_matrix = Transform2D(); + + _set_uniforms(); + _bind_quad_buffer(); +} + +void RasterizerCanvasBaseGLES2::canvas_end() { + + glBindBuffer(GL_ARRAY_BUFFER, 0); + + for (int i = 0; i < VS::ARRAY_MAX; i++) { + glDisableVertexAttribArray(i); + } + + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_DIRECT_TO_SCREEN]) { + //reset viewport to full window size + int viewport_width = OS::get_singleton()->get_window_size().width; + int viewport_height = OS::get_singleton()->get_window_size().height; + glViewport(0, 0, viewport_width, viewport_height); + glScissor(0, 0, viewport_width, viewport_height); + } + + state.using_texture_rect = false; + state.using_skeleton = false; + state.using_ninepatch = false; + state.using_transparent_rt = false; +} + +void RasterizerCanvasBaseGLES2::draw_generic_textured_rect(const Rect2 &p_rect, const Rect2 &p_src) { + + state.canvas_shader.set_uniform(CanvasShaderGLES2::DST_RECT, Color(p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y)); + state.canvas_shader.set_uniform(CanvasShaderGLES2::SRC_RECT, Color(p_src.position.x, p_src.position.y, p_src.size.x, p_src.size.y)); + + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); +} + +RasterizerStorageGLES2::Texture *RasterizerCanvasBaseGLES2::_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map) { + + RasterizerStorageGLES2::Texture *tex_return = NULL; + + if (p_texture.is_valid()) { + + RasterizerStorageGLES2::Texture *texture = storage->texture_owner.getornull(p_texture); + + if (!texture) { + state.current_tex = RID(); + state.current_tex_ptr = NULL; + + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 1); + glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); + + } else { + + if (texture->redraw_if_visible) { + VisualServerRaster::redraw_request(); + } + + texture = texture->get_ptr(); + + if (texture->render_target) { + texture->render_target->used_in_frame = true; + } + + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 1); + glBindTexture(GL_TEXTURE_2D, texture->tex_id); + + state.current_tex = p_texture; + state.current_tex_ptr = texture; + + tex_return = texture; + } + } else { + state.current_tex = RID(); + state.current_tex_ptr = NULL; + + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 1); + glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); + } + + if (p_normal_map == state.current_normal) { + //do none + state.canvas_shader.set_uniform(CanvasShaderGLES2::USE_DEFAULT_NORMAL, state.current_normal.is_valid()); + + } else if (p_normal_map.is_valid()) { + + RasterizerStorageGLES2::Texture *normal_map = storage->texture_owner.getornull(p_normal_map); + + if (!normal_map) { + state.current_normal = RID(); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 2); + glBindTexture(GL_TEXTURE_2D, storage->resources.normal_tex); + state.canvas_shader.set_uniform(CanvasShaderGLES2::USE_DEFAULT_NORMAL, false); + + } else { + + if (normal_map->redraw_if_visible) { //check before proxy, because this is usually used with proxies + VisualServerRaster::redraw_request(); + } + + normal_map = normal_map->get_ptr(); + + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 2); + glBindTexture(GL_TEXTURE_2D, normal_map->tex_id); + state.current_normal = p_normal_map; + state.canvas_shader.set_uniform(CanvasShaderGLES2::USE_DEFAULT_NORMAL, true); + } + + } else { + + state.current_normal = RID(); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 2); + glBindTexture(GL_TEXTURE_2D, storage->resources.normal_tex); + state.canvas_shader.set_uniform(CanvasShaderGLES2::USE_DEFAULT_NORMAL, false); + } + + return tex_return; +} + +void RasterizerCanvasBaseGLES2::draw_window_margins(int *black_margin, RID *black_image) { + + Vector2 window_size = OS::get_singleton()->get_window_size(); + int window_h = window_size.height; + int window_w = window_size.width; + + glBindFramebuffer(GL_FRAMEBUFFER, storage->system_fbo); + glViewport(0, 0, window_size.width, window_size.height); + canvas_begin(); + + if (black_image[MARGIN_LEFT].is_valid()) { + _bind_canvas_texture(black_image[MARGIN_LEFT], RID()); + Size2 sz(storage->texture_get_width(black_image[MARGIN_LEFT]), storage->texture_get_height(black_image[MARGIN_LEFT])); + draw_generic_textured_rect(Rect2(0, 0, black_margin[MARGIN_LEFT], window_h), Rect2(0, 0, sz.x, sz.y)); + } else if (black_margin[MARGIN_LEFT]) { + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); + + draw_generic_textured_rect(Rect2(0, 0, black_margin[MARGIN_LEFT], window_h), Rect2(0, 0, 1, 1)); + } + + if (black_image[MARGIN_RIGHT].is_valid()) { + _bind_canvas_texture(black_image[MARGIN_RIGHT], RID()); + Size2 sz(storage->texture_get_width(black_image[MARGIN_RIGHT]), storage->texture_get_height(black_image[MARGIN_RIGHT])); + draw_generic_textured_rect(Rect2(window_w - black_margin[MARGIN_RIGHT], 0, black_margin[MARGIN_RIGHT], window_h), Rect2(0, 0, sz.x, sz.y)); + } else if (black_margin[MARGIN_RIGHT]) { + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); + + draw_generic_textured_rect(Rect2(window_w - black_margin[MARGIN_RIGHT], 0, black_margin[MARGIN_RIGHT], window_h), Rect2(0, 0, 1, 1)); + } + + if (black_image[MARGIN_TOP].is_valid()) { + _bind_canvas_texture(black_image[MARGIN_TOP], RID()); + + Size2 sz(storage->texture_get_width(black_image[MARGIN_TOP]), storage->texture_get_height(black_image[MARGIN_TOP])); + draw_generic_textured_rect(Rect2(0, 0, window_w, black_margin[MARGIN_TOP]), Rect2(0, 0, sz.x, sz.y)); + + } else if (black_margin[MARGIN_TOP]) { + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); + + draw_generic_textured_rect(Rect2(0, 0, window_w, black_margin[MARGIN_TOP]), Rect2(0, 0, 1, 1)); + } + + if (black_image[MARGIN_BOTTOM].is_valid()) { + + _bind_canvas_texture(black_image[MARGIN_BOTTOM], RID()); + + Size2 sz(storage->texture_get_width(black_image[MARGIN_BOTTOM]), storage->texture_get_height(black_image[MARGIN_BOTTOM])); + draw_generic_textured_rect(Rect2(0, window_h - black_margin[MARGIN_BOTTOM], window_w, black_margin[MARGIN_BOTTOM]), Rect2(0, 0, sz.x, sz.y)); + + } else if (black_margin[MARGIN_BOTTOM]) { + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); + + draw_generic_textured_rect(Rect2(0, window_h - black_margin[MARGIN_BOTTOM], window_w, black_margin[MARGIN_BOTTOM]), Rect2(0, 0, 1, 1)); + } + + canvas_end(); +} + +void RasterizerCanvasBaseGLES2::_bind_quad_buffer() { + glBindBuffer(GL_ARRAY_BUFFER, data.canvas_quad_vertices); + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, 0, NULL); +} + +void RasterizerCanvasBaseGLES2::_set_uniforms() { + + state.canvas_shader.set_uniform(CanvasShaderGLES2::PROJECTION_MATRIX, state.uniforms.projection_matrix); + state.canvas_shader.set_uniform(CanvasShaderGLES2::MODELVIEW_MATRIX, state.uniforms.modelview_matrix); + state.canvas_shader.set_uniform(CanvasShaderGLES2::EXTRA_MATRIX, state.uniforms.extra_matrix); + + state.canvas_shader.set_uniform(CanvasShaderGLES2::FINAL_MODULATE, state.uniforms.final_modulate); + + state.canvas_shader.set_uniform(CanvasShaderGLES2::TIME, storage->frame.time[0]); + + if (storage->frame.current_rt) { + Vector2 screen_pixel_size; + screen_pixel_size.x = 1.0 / storage->frame.current_rt->width; + screen_pixel_size.y = 1.0 / storage->frame.current_rt->height; + + state.canvas_shader.set_uniform(CanvasShaderGLES2::SCREEN_PIXEL_SIZE, screen_pixel_size); + } + + if (state.using_skeleton) { + state.canvas_shader.set_uniform(CanvasShaderGLES2::SKELETON_TRANSFORM, state.skeleton_transform); + state.canvas_shader.set_uniform(CanvasShaderGLES2::SKELETON_TRANSFORM_INVERSE, state.skeleton_transform_inverse); + state.canvas_shader.set_uniform(CanvasShaderGLES2::SKELETON_TEXTURE_SIZE, state.skeleton_texture_size); + } + + if (state.using_light) { + + Light *light = state.using_light; + state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_MATRIX, light->light_shader_xform); + Transform2D basis_inverse = light->light_shader_xform.affine_inverse().orthonormalized(); + basis_inverse[2] = Vector2(); + state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_MATRIX_INVERSE, basis_inverse); + state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_LOCAL_MATRIX, light->xform_cache.affine_inverse()); + state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_COLOR, light->color * light->energy); + state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_POS, light->light_shader_pos); + state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_HEIGHT, light->height); + state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_OUTSIDE_ALPHA, light->mode == VS::CANVAS_LIGHT_MODE_MASK ? 1.0 : 0.0); + + if (state.using_shadow) { + RasterizerStorageGLES2::CanvasLightShadow *cls = storage->canvas_light_shadow_owner.get(light->shadow_buffer); + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 5); + glBindTexture(GL_TEXTURE_2D, cls->distance); + state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_MATRIX, light->shadow_matrix_cache); + state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_SHADOW_COLOR, light->shadow_color); + + state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOWPIXEL_SIZE, (1.0 / light->shadow_buffer_size) * (1.0 + light->shadow_smooth)); + if (light->radius_cache == 0) { + state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_GRADIENT, 0.0); + } else { + state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_GRADIENT, light->shadow_gradient_length / (light->radius_cache * 1.1)); + } + state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_DISTANCE_MULT, light->radius_cache * 1.1); + + /*canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_MATRIX,light->shadow_matrix_cache); + canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_ESM_MULTIPLIER,light->shadow_esm_mult); + canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_SHADOW_COLOR,light->shadow_color);*/ + } + } +} + +void RasterizerCanvasBaseGLES2::reset_canvas() { + + glDisable(GL_CULL_FACE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_DITHER); + glEnable(GL_BLEND); + + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + + // bind the back buffer to a texture so shaders can use it. + // It should probably use texture unit -3 (as GLES2 does as well) but currently that's buggy. + // keeping this for now as there's nothing else that uses texture unit 2 + // TODO ^ + if (storage->frame.current_rt) { + // glActiveTexture(GL_TEXTURE0 + 2); + // glBindTexture(GL_TEXTURE_2D, storage->frame.current_rt->copy_screen_effect.color); + } + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); +} + +void RasterizerCanvasBaseGLES2::canvas_debug_viewport_shadows(Light *p_lights_with_shadow) { +} + +void RasterizerCanvasBaseGLES2::_copy_texscreen(const Rect2 &p_rect) { + + state.canvas_texscreen_used = true; + + _copy_screen(p_rect); + + // back to canvas, force rebind + state.using_texture_rect = false; + state.canvas_shader.bind(); + _bind_canvas_texture(state.current_tex, state.current_normal); + _set_uniforms(); +} + +void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights, const int *p_bones) { + + glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); +#ifndef GLES_OVER_GL + // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData + glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); +#endif + + uint32_t buffer_ofs = 0; + + glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices); + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); + buffer_ofs += sizeof(Vector2) * p_vertex_count; + + if (p_singlecolor) { + glDisableVertexAttribArray(VS::ARRAY_COLOR); + Color m = *p_colors; + glVertexAttrib4f(VS::ARRAY_COLOR, m.r, m.g, m.b, m.a); + } else if (!p_colors) { + glDisableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); + } else { + glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Color) * p_vertex_count, p_colors); + glEnableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttribPointer(VS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(Color), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); + buffer_ofs += sizeof(Color) * p_vertex_count; + } + + if (p_uvs) { + glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_uvs); + glEnableVertexAttribArray(VS::ARRAY_TEX_UV); + glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); + buffer_ofs += sizeof(Vector2) * p_vertex_count; + } else { + glDisableVertexAttribArray(VS::ARRAY_TEX_UV); + } + + if (p_weights && p_bones) { + glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(float) * 4 * p_vertex_count, p_weights); + glEnableVertexAttribArray(VS::ARRAY_WEIGHTS); + glVertexAttribPointer(VS::ARRAY_WEIGHTS, 4, GL_FLOAT, GL_FALSE, sizeof(float) * 4, CAST_INT_TO_UCHAR_PTR(buffer_ofs)); + buffer_ofs += sizeof(float) * 4 * p_vertex_count; + + glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(int) * 4 * p_vertex_count, p_bones); + glEnableVertexAttribArray(VS::ARRAY_BONES); + glVertexAttribPointer(VS::ARRAY_BONES, 4, GL_UNSIGNED_INT, GL_FALSE, sizeof(int) * 4, CAST_INT_TO_UCHAR_PTR(buffer_ofs)); + buffer_ofs += sizeof(int) * 4 * p_vertex_count; + + } else { + glDisableVertexAttribArray(VS::ARRAY_WEIGHTS); + glDisableVertexAttribArray(VS::ARRAY_BONES); + } + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); +#ifndef GLES_OVER_GL + // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData + glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW); +#endif + + if (storage->config.support_32_bits_indices) { //should check for + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices); + glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0); + } else { + uint16_t *index16 = (uint16_t *)alloca(sizeof(uint16_t) * p_index_count); + for (int i = 0; i < p_index_count; i++) { + index16[i] = uint16_t(p_indices[i]); + } + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(uint16_t) * p_index_count, index16); + glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_SHORT, 0); + } + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); +} + +void RasterizerCanvasBaseGLES2::_draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor) { + + glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); +#ifndef GLES_OVER_GL + // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData + glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); +#endif + + uint32_t buffer_ofs = 0; + + glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices); + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); + buffer_ofs += sizeof(Vector2) * p_vertex_count; + + if (p_singlecolor) { + glDisableVertexAttribArray(VS::ARRAY_COLOR); + Color m = *p_colors; + glVertexAttrib4f(VS::ARRAY_COLOR, m.r, m.g, m.b, m.a); + } else if (!p_colors) { + glDisableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); + } else { + glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Color) * p_vertex_count, p_colors); + glEnableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttribPointer(VS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(Color), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); + buffer_ofs += sizeof(Color) * p_vertex_count; + } + + if (p_uvs) { + glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_uvs); + glEnableVertexAttribArray(VS::ARRAY_TEX_UV); + glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); + } else { + glDisableVertexAttribArray(VS::ARRAY_TEX_UV); + } + + glDrawArrays(p_primitive, 0, p_vertex_count); + + glBindBuffer(GL_ARRAY_BUFFER, 0); +} + +void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor) { + + glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); +#ifndef GLES_OVER_GL + // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData + glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); +#endif + + uint32_t buffer_ofs = 0; + + glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices); + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); + buffer_ofs += sizeof(Vector2) * p_vertex_count; + + if (p_singlecolor) { + glDisableVertexAttribArray(VS::ARRAY_COLOR); + Color m = *p_colors; + glVertexAttrib4f(VS::ARRAY_COLOR, m.r, m.g, m.b, m.a); + } else if (!p_colors) { + glDisableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); + } else { + glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Color) * p_vertex_count, p_colors); + glEnableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttribPointer(VS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(Color), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); + buffer_ofs += sizeof(Color) * p_vertex_count; + } + + if (p_uvs) { + glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_uvs); + glEnableVertexAttribArray(VS::ARRAY_TEX_UV); + glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); + buffer_ofs += sizeof(Vector2) * p_vertex_count; + } else { + glDisableVertexAttribArray(VS::ARRAY_TEX_UV); + } + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); +#ifndef GLES_OVER_GL + // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData + glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW); +#endif + + if (storage->config.support_32_bits_indices) { //should check for + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices); + glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0); + } else { + uint16_t *index16 = (uint16_t *)alloca(sizeof(uint16_t) * p_index_count); + for (int i = 0; i < p_index_count; i++) { + index16[i] = uint16_t(p_indices[i]); + } + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(uint16_t) * p_index_count, index16); + glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_SHORT, 0); + } + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); +} + +void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs) { + + static const GLenum prim[5] = { GL_POINTS, GL_POINTS, GL_LINES, GL_TRIANGLES, GL_TRIANGLE_FAN }; + + int color_offset = 0; + int uv_offset = 0; + int stride = 2; + + if (p_colors) { + color_offset = stride; + stride += 4; + } + + if (p_uvs) { + uv_offset = stride; + stride += 2; + } + + float buffer_data[(2 + 2 + 4) * 4]; + + for (int i = 0; i < p_points; i++) { + buffer_data[stride * i + 0] = p_vertices[i].x; + buffer_data[stride * i + 1] = p_vertices[i].y; + } + + if (p_colors) { + for (int i = 0; i < p_points; i++) { + buffer_data[stride * i + color_offset + 0] = p_colors[i].r; + buffer_data[stride * i + color_offset + 1] = p_colors[i].g; + buffer_data[stride * i + color_offset + 2] = p_colors[i].b; + buffer_data[stride * i + color_offset + 3] = p_colors[i].a; + } + } + + if (p_uvs) { + for (int i = 0; i < p_points; i++) { + buffer_data[stride * i + uv_offset + 0] = p_uvs[i].x; + buffer_data[stride * i + uv_offset + 1] = p_uvs[i].y; + } + } + + glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); +#ifndef GLES_OVER_GL + // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData + glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); +#endif + glBufferSubData(GL_ARRAY_BUFFER, 0, p_points * stride * 4 * sizeof(float), buffer_data); + + glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, stride * sizeof(float), NULL); + + if (p_colors) { + glVertexAttribPointer(VS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(color_offset * sizeof(float))); + glEnableVertexAttribArray(VS::ARRAY_COLOR); + } + + if (p_uvs) { + glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(uv_offset * sizeof(float))); + glEnableVertexAttribArray(VS::ARRAY_TEX_UV); + } + + glDrawArrays(prim[p_points], 0, p_points); + + glBindBuffer(GL_ARRAY_BUFFER, 0); +} + +void RasterizerCanvasBaseGLES2::_copy_screen(const Rect2 &p_rect) { + + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_DIRECT_TO_SCREEN]) { + ERR_PRINT_ONCE("Cannot use screen texture copying in render target set to render direct to screen."); + return; + } + + ERR_FAIL_COND_MSG(storage->frame.current_rt->copy_screen_effect.color == 0, "Can't use screen texture copying in a render target configured without copy buffers."); + + glDisable(GL_BLEND); + + Vector2 wh(storage->frame.current_rt->width, storage->frame.current_rt->height); + + Color copy_section(p_rect.position.x / wh.x, p_rect.position.y / wh.y, p_rect.size.x / wh.x, p_rect.size.y / wh.y); + + if (p_rect != Rect2()) { + storage->shaders.copy.set_conditional(CopyShaderGLES2::USE_COPY_SECTION, true); + } + + storage->shaders.copy.set_conditional(CopyShaderGLES2::USE_NO_ALPHA, !state.using_transparent_rt); + + glBindFramebuffer(GL_FRAMEBUFFER, storage->frame.current_rt->copy_screen_effect.fbo); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, storage->frame.current_rt->color); + + storage->shaders.copy.bind(); + storage->shaders.copy.set_uniform(CopyShaderGLES2::COPY_SECTION, copy_section); + + const Vector2 vertpos[4] = { + Vector2(-1, -1), + Vector2(-1, 1), + Vector2(1, 1), + Vector2(1, -1), + }; + + const Vector2 uvpos[4] = { + Vector2(0, 0), + Vector2(0, 1), + Vector2(1, 1), + Vector2(1, 0) + }; + + const int indexpos[6] = { + 0, 1, 2, + 2, 3, 0 + }; + + _draw_polygon(indexpos, 6, 4, vertpos, uvpos, NULL, false); + + storage->shaders.copy.set_conditional(CopyShaderGLES2::USE_COPY_SECTION, false); + storage->shaders.copy.set_conditional(CopyShaderGLES2::USE_NO_ALPHA, false); + + glBindFramebuffer(GL_FRAMEBUFFER, storage->frame.current_rt->fbo); //back to front + glEnable(GL_BLEND); +} + +void RasterizerCanvasBaseGLES2::canvas_light_shadow_buffer_update(RID p_buffer, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, CameraMatrix *p_xform_cache) { + + RasterizerStorageGLES2::CanvasLightShadow *cls = storage->canvas_light_shadow_owner.get(p_buffer); + ERR_FAIL_COND(!cls); + + glDisable(GL_BLEND); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_DITHER); + glDisable(GL_CULL_FACE); + glDepthFunc(GL_LEQUAL); + glEnable(GL_DEPTH_TEST); + glDepthMask(true); + + glBindFramebuffer(GL_FRAMEBUFFER, cls->fbo); + + state.canvas_shadow_shader.set_conditional(CanvasShadowShaderGLES2::USE_RGBA_SHADOWS, storage->config.use_rgba_2d_shadows); + state.canvas_shadow_shader.bind(); + + glViewport(0, 0, cls->size, cls->height); + glClearDepth(1.0f); + glClearColor(1, 1, 1, 1); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + VS::CanvasOccluderPolygonCullMode cull = VS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED; + + for (int i = 0; i < 4; i++) { + + //make sure it remains orthogonal, makes easy to read angle later + + Transform light; + light.origin[0] = p_light_xform[2][0]; + light.origin[1] = p_light_xform[2][1]; + light.basis[0][0] = p_light_xform[0][0]; + light.basis[0][1] = p_light_xform[1][0]; + light.basis[1][0] = p_light_xform[0][1]; + light.basis[1][1] = p_light_xform[1][1]; + + //light.basis.scale(Vector3(to_light.elements[0].length(),to_light.elements[1].length(),1)); + + //p_near=1; + CameraMatrix projection; + { + real_t fov = 90; + real_t nearp = p_near; + real_t farp = p_far; + real_t aspect = 1.0; + + real_t ymax = nearp * Math::tan(Math::deg2rad(fov * 0.5)); + real_t ymin = -ymax; + real_t xmin = ymin * aspect; + real_t xmax = ymax * aspect; + + projection.set_frustum(xmin, xmax, ymin, ymax, nearp, farp); + } + + Vector3 cam_target = Basis(Vector3(0, 0, Math_PI * 2 * (i / 4.0))).xform(Vector3(0, 1, 0)); + projection = projection * CameraMatrix(Transform().looking_at(cam_target, Vector3(0, 0, -1)).affine_inverse()); + + state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES2::PROJECTION_MATRIX, projection); + state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES2::LIGHT_MATRIX, light); + state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES2::DISTANCE_NORM, 1.0 / p_far); + + if (i == 0) + *p_xform_cache = projection; + + glViewport(0, (cls->height / 4) * i, cls->size, cls->height / 4); + + LightOccluderInstance *instance = p_occluders; + + while (instance) { + + RasterizerStorageGLES2::CanvasOccluder *cc = storage->canvas_occluder_owner.getornull(instance->polygon_buffer); + if (!cc || cc->len == 0 || !(p_light_mask & instance->light_mask)) { + + instance = instance->next; + continue; + } + + state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES2::WORLD_MATRIX, instance->xform_cache); + + VS::CanvasOccluderPolygonCullMode transformed_cull_cache = instance->cull_cache; + + if (transformed_cull_cache != VS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED && + (p_light_xform.basis_determinant() * instance->xform_cache.basis_determinant()) < 0) { + transformed_cull_cache = + transformed_cull_cache == VS::CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE ? + VS::CANVAS_OCCLUDER_POLYGON_CULL_COUNTER_CLOCKWISE : + VS::CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE; + } + + if (cull != transformed_cull_cache) { + + cull = transformed_cull_cache; + switch (cull) { + case VS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED: { + + glDisable(GL_CULL_FACE); + + } break; + case VS::CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE: { + + glEnable(GL_CULL_FACE); + glCullFace(GL_FRONT); + } break; + case VS::CANVAS_OCCLUDER_POLYGON_CULL_COUNTER_CLOCKWISE: { + + glEnable(GL_CULL_FACE); + glCullFace(GL_BACK); + + } break; + } + } + + glBindBuffer(GL_ARRAY_BUFFER, cc->vertex_id); + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glVertexAttribPointer(VS::ARRAY_VERTEX, 3, GL_FLOAT, false, 0, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, cc->index_id); + + glDrawElements(GL_TRIANGLES, cc->len * 3, GL_UNSIGNED_SHORT, 0); + + instance = instance->next; + } + } + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); +} + +void RasterizerCanvasBaseGLES2::draw_lens_distortion_rect(const Rect2 &p_rect, float p_k1, float p_k2, const Vector2 &p_eye_center, float p_oversample) { + Vector2 half_size; + if (storage->frame.current_rt) { + half_size = Vector2(storage->frame.current_rt->width, storage->frame.current_rt->height); + } else { + half_size = OS::get_singleton()->get_window_size(); + } + half_size *= 0.5; + Vector2 offset((p_rect.position.x - half_size.x) / half_size.x, (p_rect.position.y - half_size.y) / half_size.y); + Vector2 scale(p_rect.size.x / half_size.x, p_rect.size.y / half_size.y); + + float aspect_ratio = p_rect.size.x / p_rect.size.y; + + // setup our lens shader + state.lens_shader.bind(); + state.lens_shader.set_uniform(LensDistortedShaderGLES2::OFFSET, offset); + state.lens_shader.set_uniform(LensDistortedShaderGLES2::SCALE, scale); + state.lens_shader.set_uniform(LensDistortedShaderGLES2::K1, p_k1); + state.lens_shader.set_uniform(LensDistortedShaderGLES2::K2, p_k2); + state.lens_shader.set_uniform(LensDistortedShaderGLES2::EYE_CENTER, p_eye_center); + state.lens_shader.set_uniform(LensDistortedShaderGLES2::UPSCALE, p_oversample); + state.lens_shader.set_uniform(LensDistortedShaderGLES2::ASPECT_RATIO, aspect_ratio); + + // bind our quad buffer + _bind_quad_buffer(); + + // and draw + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + + // and cleanup + glBindBuffer(GL_ARRAY_BUFFER, 0); + + for (int i = 0; i < VS::ARRAY_MAX; i++) { + glDisableVertexAttribArray(i); + } +} + +void RasterizerCanvasBaseGLES2::initialize() { + + // quad buffer + { + glGenBuffers(1, &data.canvas_quad_vertices); + glBindBuffer(GL_ARRAY_BUFFER, data.canvas_quad_vertices); + + const float qv[8] = { + 0, 0, + 0, 1, + 1, 1, + 1, 0 + }; + + glBufferData(GL_ARRAY_BUFFER, sizeof(float) * 8, qv, GL_STATIC_DRAW); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + + // polygon buffer + { + uint32_t poly_size = GLOBAL_DEF("rendering/limits/buffers/canvas_polygon_buffer_size_kb", 128); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/limits/buffers/canvas_polygon_buffer_size_kb", PropertyInfo(Variant::INT, "rendering/limits/buffers/canvas_polygon_buffer_size_kb", PROPERTY_HINT_RANGE, "0,256,1,or_greater")); + poly_size *= 1024; + poly_size = MAX(poly_size, (2 + 2 + 4) * 4 * sizeof(float)); + glGenBuffers(1, &data.polygon_buffer); + glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); + glBufferData(GL_ARRAY_BUFFER, poly_size, NULL, GL_DYNAMIC_DRAW); + + data.polygon_buffer_size = poly_size; + + glBindBuffer(GL_ARRAY_BUFFER, 0); + + uint32_t index_size = GLOBAL_DEF("rendering/limits/buffers/canvas_polygon_index_buffer_size_kb", 128); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/limits/buffers/canvas_polygon_index_buffer_size_kb", PropertyInfo(Variant::INT, "rendering/limits/buffers/canvas_polygon_index_buffer_size_kb", PROPERTY_HINT_RANGE, "0,256,1,or_greater")); + index_size *= 1024; // kb + glGenBuffers(1, &data.polygon_index_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, index_size, NULL, GL_DYNAMIC_DRAW); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + data.polygon_index_buffer_size = index_size; + } + + // ninepatch buffers + { + // array buffer + glGenBuffers(1, &data.ninepatch_vertices); + glBindBuffer(GL_ARRAY_BUFFER, data.ninepatch_vertices); + + glBufferData(GL_ARRAY_BUFFER, sizeof(float) * (16 + 16) * 2, NULL, GL_DYNAMIC_DRAW); + + glBindBuffer(GL_ARRAY_BUFFER, 0); + + // element buffer + glGenBuffers(1, &data.ninepatch_elements); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.ninepatch_elements); + +#define _EIDX(y, x) (y * 4 + x) + uint8_t elems[3 * 2 * 9] = { + + // first row + + _EIDX(0, 0), _EIDX(0, 1), _EIDX(1, 1), + _EIDX(1, 1), _EIDX(1, 0), _EIDX(0, 0), + + _EIDX(0, 1), _EIDX(0, 2), _EIDX(1, 2), + _EIDX(1, 2), _EIDX(1, 1), _EIDX(0, 1), + + _EIDX(0, 2), _EIDX(0, 3), _EIDX(1, 3), + _EIDX(1, 3), _EIDX(1, 2), _EIDX(0, 2), + + // second row + + _EIDX(1, 0), _EIDX(1, 1), _EIDX(2, 1), + _EIDX(2, 1), _EIDX(2, 0), _EIDX(1, 0), + + // the center one would be here, but we'll put it at the end + // so it's easier to disable the center and be able to use + // one draw call for both + + _EIDX(1, 2), _EIDX(1, 3), _EIDX(2, 3), + _EIDX(2, 3), _EIDX(2, 2), _EIDX(1, 2), + + // third row + + _EIDX(2, 0), _EIDX(2, 1), _EIDX(3, 1), + _EIDX(3, 1), _EIDX(3, 0), _EIDX(2, 0), + + _EIDX(2, 1), _EIDX(2, 2), _EIDX(3, 2), + _EIDX(3, 2), _EIDX(3, 1), _EIDX(2, 1), + + _EIDX(2, 2), _EIDX(2, 3), _EIDX(3, 3), + _EIDX(3, 3), _EIDX(3, 2), _EIDX(2, 2), + + // center field + + _EIDX(1, 1), _EIDX(1, 2), _EIDX(2, 2), + _EIDX(2, 2), _EIDX(2, 1), _EIDX(1, 1) + }; +#undef _EIDX + + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(elems), elems, GL_STATIC_DRAW); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } + + state.canvas_shadow_shader.init(); + + state.canvas_shader.init(); + + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_RGBA_SHADOWS, storage->config.use_rgba_2d_shadows); + + state.canvas_shader.bind(); + + state.lens_shader.init(); + + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP, GLOBAL_DEF("rendering/quality/2d/use_pixel_snap", false)); + + state.using_light = NULL; + state.using_transparent_rt = false; + state.using_skeleton = false; +} + +void RasterizerCanvasBaseGLES2::finalize() { +} + +RasterizerCanvasBaseGLES2::RasterizerCanvasBaseGLES2() { +#ifdef GLES_OVER_GL + use_nvidia_rect_workaround = GLOBAL_GET("rendering/quality/2d/gles2_use_nvidia_rect_flicker_workaround"); +#else + // Not needed (a priori) on GLES devices + use_nvidia_rect_workaround = false; +#endif +} diff --git a/drivers/gles2/rasterizer_canvas_base_gles2.h b/drivers/gles2/rasterizer_canvas_base_gles2.h new file mode 100644 index 000000000000..32d55bc3eee5 --- /dev/null +++ b/drivers/gles2/rasterizer_canvas_base_gles2.h @@ -0,0 +1,140 @@ +/*************************************************************************/ +/* rasterizer_canvas_base_gles2.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef RASTERIZERCANVASBASEGLES2_H +#define RASTERIZERCANVASBASEGLES2_H + +#include "rasterizer_array_gles2.h" +#include "rasterizer_storage_gles2.h" +#include "servers/visual/rasterizer.h" + +#include "shaders/canvas.glsl.gen.h" +#include "shaders/lens_distorted.glsl.gen.h" + +#include "shaders/canvas_shadow.glsl.gen.h" + +class RasterizerCanvasBaseGLES2 : public RasterizerCanvas { +public: + enum { + INSTANCE_ATTRIB_BASE = 8, + }; + + struct Uniforms { + Transform projection_matrix; + + Transform2D modelview_matrix; + Transform2D extra_matrix; + + Color final_modulate; + + float time; + }; + + struct Data { + GLuint canvas_quad_vertices; + GLuint polygon_buffer; + GLuint polygon_index_buffer; + + uint32_t polygon_buffer_size; + uint32_t polygon_index_buffer_size; + + GLuint ninepatch_vertices; + GLuint ninepatch_elements; + } data; + + struct State { + Uniforms uniforms; + bool canvas_texscreen_used; + CanvasShaderGLES2 canvas_shader; + CanvasShadowShaderGLES2 canvas_shadow_shader; + LensDistortedShaderGLES2 lens_shader; + + bool using_texture_rect; + bool using_ninepatch; + bool using_skeleton; + + Transform2D skeleton_transform; + Transform2D skeleton_transform_inverse; + Size2i skeleton_texture_size; + + RID current_tex; + RID current_normal; + RasterizerStorageGLES2::Texture *current_tex_ptr; + + Transform vp; + Light *using_light; + bool using_shadow; + bool using_transparent_rt; + + } state; + + typedef void Texture; + + RasterizerSceneGLES2 *scene_render; + + RasterizerStorageGLES2 *storage; + + bool use_nvidia_rect_workaround; + + void _set_uniforms(); + + virtual RID light_internal_create(); + virtual void light_internal_update(RID p_rid, Light *p_light); + virtual void light_internal_free(RID p_rid); + + virtual void canvas_begin(); + virtual void canvas_end(); + + void _draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs); + void _draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights = NULL, const int *p_bones = NULL); + void _draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor); + void _draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor); + + void _bind_quad_buffer(); + void _copy_texscreen(const Rect2 &p_rect); + void _copy_screen(const Rect2 &p_rect); + + virtual void draw_window_margins(int *black_margin, RID *black_image); + void draw_generic_textured_rect(const Rect2 &p_rect, const Rect2 &p_src); + void draw_lens_distortion_rect(const Rect2 &p_rect, float p_k1, float p_k2, const Vector2 &p_eye_center, float p_oversample); + + virtual void reset_canvas(); + virtual void canvas_light_shadow_buffer_update(RID p_buffer, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, CameraMatrix *p_xform_cache); + virtual void canvas_debug_viewport_shadows(Light *p_lights_with_shadow); + + RasterizerStorageGLES2::Texture *_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map); + + void initialize(); + void finalize(); + + RasterizerCanvasBaseGLES2(); +}; + +#endif // RASTERIZERCANVASBASEGLES2_H diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index 373d3989ce45..dc2885796c07 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -35,1403 +35,1549 @@ #include "rasterizer_scene_gles2.h" #include "servers/visual/visual_server_raster.h" -#ifndef GLES_OVER_GL -#define glClearDepth glClearDepthf -#endif - -RID RasterizerCanvasGLES2::light_internal_create() { +static const GLenum gl_primitive[] = { + GL_POINTS, + GL_LINES, + GL_LINE_STRIP, + GL_LINE_LOOP, + GL_TRIANGLES, + GL_TRIANGLE_STRIP, + GL_TRIANGLE_FAN +}; - return RID(); +RasterizerCanvasGLES2::BatchData::BatchData() { + reset_flush(); + gl_vertex_buffer = 0; + gl_index_buffer = 0; + max_quads = 0; + vertex_buffer_size_units = 0; + vertex_buffer_size_bytes = 0; + index_buffer_size_units = 0; + index_buffer_size_bytes = 0; + use_colored_vertices = false; + settings_use_batching = false; + settings_max_join_item_commands = 0; + settings_colored_vertex_format_threshold = 0.0f; + settings_batch_buffer_num_verts = 0; + scissor_threshold_area = 0.0f; + + settings_use_batching_original_choice = false; + settings_flash_batching = false; + settings_scissor_lights = false; + settings_scissor_threshold = -1.0f; } -void RasterizerCanvasGLES2::light_internal_update(RID p_rid, Light *p_light) { +RasterizerCanvasGLES2::RenderItemState::RenderItemState() { + current_clip = NULL; + shader_cache = NULL; + rebind_shader = true; + prev_use_skeleton = false; + last_blend_mode = -1; + canvas_last_material = RID(); + item_group_z = 0; + item_group_light = 0; + final_modulate = Color(-1.0, -1.0, -1.0, -1.0); // just something unlikely } -void RasterizerCanvasGLES2::light_internal_free(RID p_rid) { -} +RasterizerStorageGLES2::Texture *RasterizerCanvasGLES2::_get_canvas_texture(const RID &p_texture) const { + if (p_texture.is_valid()) { + + RasterizerStorageGLES2::Texture *texture = storage->texture_owner.getornull(p_texture); -void RasterizerCanvasGLES2::_set_uniforms() { + if (texture) { + return texture->get_ptr(); + } + } - state.canvas_shader.set_uniform(CanvasShaderGLES2::PROJECTION_MATRIX, state.uniforms.projection_matrix); - state.canvas_shader.set_uniform(CanvasShaderGLES2::MODELVIEW_MATRIX, state.uniforms.modelview_matrix); - state.canvas_shader.set_uniform(CanvasShaderGLES2::EXTRA_MATRIX, state.uniforms.extra_matrix); + return 0; +} - state.canvas_shader.set_uniform(CanvasShaderGLES2::FINAL_MODULATE, state.uniforms.final_modulate); +int RasterizerCanvasGLES2::_batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match) { - state.canvas_shader.set_uniform(CanvasShaderGLES2::TIME, storage->frame.time[0]); + // optimization .. in 99% cases the last matched value will be the same, so no need to traverse the list + if (p_previous_match > 0) // if it is zero, it will get hit first in the linear search anyway + { + const BatchTex &batch_texture = bdata.batch_textures[p_previous_match]; - if (storage->frame.current_rt) { - Vector2 screen_pixel_size; - screen_pixel_size.x = 1.0 / storage->frame.current_rt->width; - screen_pixel_size.y = 1.0 / storage->frame.current_rt->height; + // note for future reference, if RID implementation changes, this could become more expensive + if ((batch_texture.RID_texture == p_texture) && (batch_texture.RID_normal == p_normal)) { + // tiling mode must also match + bool tiles = batch_texture.tile_mode != BatchTex::TILE_OFF; - state.canvas_shader.set_uniform(CanvasShaderGLES2::SCREEN_PIXEL_SIZE, screen_pixel_size); + if (tiles == p_tile) + // match! + return p_previous_match; + } } - if (state.using_skeleton) { - state.canvas_shader.set_uniform(CanvasShaderGLES2::SKELETON_TRANSFORM, state.skeleton_transform); - state.canvas_shader.set_uniform(CanvasShaderGLES2::SKELETON_TRANSFORM_INVERSE, state.skeleton_transform_inverse); - state.canvas_shader.set_uniform(CanvasShaderGLES2::SKELETON_TEXTURE_SIZE, state.skeleton_texture_size); - } + // not the previous match .. we will do a linear search ... slower, but should happen + // not very often except with non-batchable runs, which are going to be slow anyway + // n.b. could possibly be replaced later by a fast hash table + for (int n = 0; n < bdata.batch_textures.size(); n++) { + const BatchTex &batch_texture = bdata.batch_textures[n]; + if ((batch_texture.RID_texture == p_texture) && (batch_texture.RID_normal == p_normal)) { - if (state.using_light) { - - Light *light = state.using_light; - state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_MATRIX, light->light_shader_xform); - Transform2D basis_inverse = light->light_shader_xform.affine_inverse().orthonormalized(); - basis_inverse[2] = Vector2(); - state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_MATRIX_INVERSE, basis_inverse); - state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_LOCAL_MATRIX, light->xform_cache.affine_inverse()); - state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_COLOR, light->color * light->energy); - state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_POS, light->light_shader_pos); - state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_HEIGHT, light->height); - state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_OUTSIDE_ALPHA, light->mode == VS::CANVAS_LIGHT_MODE_MASK ? 1.0 : 0.0); - - if (state.using_shadow) { - RasterizerStorageGLES2::CanvasLightShadow *cls = storage->canvas_light_shadow_owner.get(light->shadow_buffer); - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 5); - glBindTexture(GL_TEXTURE_2D, cls->distance); - state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_MATRIX, light->shadow_matrix_cache); - state.canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_SHADOW_COLOR, light->shadow_color); - - state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOWPIXEL_SIZE, (1.0 / light->shadow_buffer_size) * (1.0 + light->shadow_smooth)); - if (light->radius_cache == 0) { - state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_GRADIENT, 0.0); - } else { - state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_GRADIENT, light->shadow_gradient_length / (light->radius_cache * 1.1)); - } - state.canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_DISTANCE_MULT, light->radius_cache * 1.1); + // tiling mode must also match + bool tiles = batch_texture.tile_mode != BatchTex::TILE_OFF; - /*canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_MATRIX,light->shadow_matrix_cache); - canvas_shader.set_uniform(CanvasShaderGLES2::SHADOW_ESM_MULTIPLIER,light->shadow_esm_mult); - canvas_shader.set_uniform(CanvasShaderGLES2::LIGHT_SHADOW_COLOR,light->shadow_color);*/ + if (tiles == p_tile) + // match! + return n; } } -} -void RasterizerCanvasGLES2::canvas_begin() { + // pushing back from local variable .. not ideal but has to use a Vector because non pod + // due to RIDs + BatchTex new_batch_tex; + new_batch_tex.RID_texture = p_texture; + new_batch_tex.RID_normal = p_normal; - state.canvas_shader.bind(); - state.using_transparent_rt = false; - int viewport_x, viewport_y, viewport_width, viewport_height; + // get the texture + RasterizerStorageGLES2::Texture *texture = _get_canvas_texture(p_texture); - if (storage->frame.current_rt) { - glBindFramebuffer(GL_FRAMEBUFFER, storage->frame.current_rt->fbo); - state.using_transparent_rt = storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]; - - if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_DIRECT_TO_SCREEN]) { - // set Viewport and Scissor when rendering directly to screen - viewport_width = storage->frame.current_rt->width; - viewport_height = storage->frame.current_rt->height; - viewport_x = storage->frame.current_rt->x; - viewport_y = OS::get_singleton()->get_window_size().height - viewport_height - storage->frame.current_rt->y; - glScissor(viewport_x, viewport_y, viewport_width, viewport_height); - glViewport(viewport_x, viewport_y, viewport_width, viewport_height); - glEnable(GL_SCISSOR_TEST); - } + if (texture) { + new_batch_tex.tex_pixel_size.x = 1.0 / texture->width; + new_batch_tex.tex_pixel_size.y = 1.0 / texture->height; + } else { + // maybe doesn't need doing... + new_batch_tex.tex_pixel_size.x = 1.0; + new_batch_tex.tex_pixel_size.y = 1.0; } - if (storage->frame.clear_request) { - glClearColor(storage->frame.clear_request_color.r, - storage->frame.clear_request_color.g, - storage->frame.clear_request_color.b, - state.using_transparent_rt ? storage->frame.clear_request_color.a : 1.0); - glClear(GL_COLOR_BUFFER_BIT); - storage->frame.clear_request = false; - } + if (p_tile) { + if (texture) { + // default + new_batch_tex.tile_mode = BatchTex::TILE_NORMAL; - /* - if (storage->frame.current_rt) { - glBindFramebuffer(GL_FRAMEBUFFER, storage->frame.current_rt->fbo); - glColorMask(1, 1, 1, 1); + // no hardware support for non power of 2 tiling + if (!storage->config.support_npot_repeat_mipmap) { + if (next_power_of_2(texture->alloc_width) != (unsigned int)texture->alloc_width && next_power_of_2(texture->alloc_height) != (unsigned int)texture->alloc_height) { + new_batch_tex.tile_mode = BatchTex::TILE_FORCE_REPEAT; + } + } + } else { + // this should not happen? + new_batch_tex.tile_mode = BatchTex::TILE_OFF; + } + } else { + new_batch_tex.tile_mode = BatchTex::TILE_OFF; } - */ - reset_canvas(); + // push back + bdata.batch_textures.push_back(new_batch_tex); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); + return bdata.batch_textures.size() - 1; +} - glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); - glDisableVertexAttribArray(VS::ARRAY_COLOR); +void RasterizerCanvasGLES2::_batch_upload_buffers() { - // set up default uniforms + // noop? + if (!bdata.vertices.size()) + return; - Transform canvas_transform; + glBindBuffer(GL_ARRAY_BUFFER, bdata.gl_vertex_buffer); - if (storage->frame.current_rt) { + // orphan the old (for now) + glBufferData(GL_ARRAY_BUFFER, 0, 0, GL_DYNAMIC_DRAW); - float csy = 1.0; - if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) { - csy = -1.0; - } - canvas_transform.translate(-(storage->frame.current_rt->width / 2.0f), -(storage->frame.current_rt->height / 2.0f), 0.0f); - canvas_transform.scale(Vector3(2.0f / storage->frame.current_rt->width, csy * -2.0f / storage->frame.current_rt->height, 1.0f)); + if (!bdata.use_colored_vertices) { + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW); } else { - Vector2 ssize = OS::get_singleton()->get_window_size(); - canvas_transform.translate(-(ssize.width / 2.0f), -(ssize.height / 2.0f), 0.0f); - canvas_transform.scale(Vector3(2.0f / ssize.width, -2.0f / ssize.height, 1.0f)); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.vertices_colored.size(), bdata.vertices_colored.get_data(), GL_DYNAMIC_DRAW); } - state.uniforms.projection_matrix = canvas_transform; - - state.uniforms.final_modulate = Color(1, 1, 1, 1); - - state.uniforms.modelview_matrix = Transform2D(); - state.uniforms.extra_matrix = Transform2D(); - - _set_uniforms(); - _bind_quad_buffer(); + // might not be necessary + glBindBuffer(GL_ARRAY_BUFFER, 0); } -void RasterizerCanvasGLES2::canvas_end() { +RasterizerCanvasGLES2::Batch *RasterizerCanvasGLES2::_batch_request_new(bool p_blank) { + Batch *batch = bdata.batches.request(); + if (!batch) { + // grow the batches + bdata.batches.grow(); - glBindBuffer(GL_ARRAY_BUFFER, 0); + // and the temporary batches (used for color verts) + bdata.batches_temp.reset(); + bdata.batches_temp.grow(); - for (int i = 0; i < VS::ARRAY_MAX; i++) { - glDisableVertexAttribArray(i); + // this should always succeed after growing + batch = bdata.batches.request(); +#ifdef DEBUG_ENABLED + CRASH_COND(!batch); +#endif } - if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_DIRECT_TO_SCREEN]) { - //reset viewport to full window size - int viewport_width = OS::get_singleton()->get_window_size().width; - int viewport_height = OS::get_singleton()->get_window_size().height; - glViewport(0, 0, viewport_width, viewport_height); - glScissor(0, 0, viewport_width, viewport_height); - } + if (p_blank) + memset(batch, 0, sizeof(Batch)); - state.using_texture_rect = false; - state.using_skeleton = false; - state.using_ninepatch = false; - state.using_transparent_rt = false; + return batch; } -RasterizerStorageGLES2::Texture *RasterizerCanvasGLES2::_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map) { +// This function may be called MULTIPLE TIMES for each item, so needs to record how far it has got +bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_command_start, Item *p_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material) { + // we will prefill batches and vertices ready for sending in one go to the vertex buffer + int command_count = p_item->commands.size(); + Item::Command *const *commands = p_item->commands.ptr(); - RasterizerStorageGLES2::Texture *tex_return = NULL; + Transform2D transform; + TransformMode transform_mode = _find_transform_mode(r_fill_state.use_hardware_transform, p_item->final_transform, transform); - if (p_texture.is_valid()) { + Vector2 texpixel_size = r_fill_state.texpixel_size; - RasterizerStorageGLES2::Texture *texture = storage->texture_owner.getornull(p_texture); + // start batch is a dummy batch (tex id -1) .. could be made more efficient + if (!r_fill_state.curr_batch) { + r_fill_state.curr_batch = _batch_request_new(); + r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + r_fill_state.curr_batch->first_command = r_command_start; + // should tex_id be set to -1? check this + } - if (!texture) { - state.current_tex = RID(); - state.current_tex_ptr = NULL; + // we need to return which command we got up to, so + // store this outside the loop + int command_num; - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 1); - glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); + // do as many commands as possible until the vertex buffer will be full up + for (command_num = r_command_start; command_num < command_count; command_num++) { - } else { + Item::Command *command = commands[command_num]; - if (texture->redraw_if_visible) { - VisualServerRaster::redraw_request(); - } + switch (command->type) { - texture = texture->get_ptr(); + default: { + if (r_fill_state.curr_batch->type == Batch::BT_DEFAULT) { + // another default command, just add to the existing batch + r_fill_state.curr_batch->num_commands++; + } else { + // end of previous different type batch, so start new default batch + r_fill_state.curr_batch = _batch_request_new(); + r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + r_fill_state.curr_batch->first_command = command_num; + r_fill_state.curr_batch->num_commands = 1; + } + } break; + case Item::Command::TYPE_RECT: { - if (texture->render_target) { - texture->render_target->used_in_frame = true; - } + Item::CommandRect *rect = static_cast(command); - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 1); - glBindTexture(GL_TEXTURE_2D, texture->tex_id); + const Color &col = rect->modulate; - state.current_tex = p_texture; - state.current_tex_ptr = texture; + // instead of doing all the texture preparation for EVERY rect, + // we build a list of texture combinations and do this once off. + // This means we have a potentially rather slow step to identify which texture combo + // using the RIDs. + int old_batch_tex_id = r_fill_state.batch_tex_id; + r_fill_state.batch_tex_id = _batch_find_or_create_tex(rect->texture, rect->normal_map, rect->flags & CANVAS_RECT_TILE, old_batch_tex_id); - tex_return = texture; - } - } else { - state.current_tex = RID(); - state.current_tex_ptr = NULL; + // try to create vertices BEFORE creating a batch, + // because if the vertex buffer is full, we need to finish this + // function, draw what we have so far, and then start a new set of batches - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 1); - glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); - } + // request FOUR vertices at a time, this is more efficient + BatchVertex *bvs = bdata.vertices.request(4); + if (!bvs) { + // run out of space in the vertex buffer .. finish this function and draw what we have so far + // return where we got to + r_command_start = command_num; + return true; + } - if (p_normal_map == state.current_normal) { - //do none - state.canvas_shader.set_uniform(CanvasShaderGLES2::USE_DEFAULT_NORMAL, state.current_normal.is_valid()); + bool change_batch = false; - } else if (p_normal_map.is_valid()) { + // conditions for creating a new batch + if ((r_fill_state.curr_batch->type != Batch::BT_RECT) || (old_batch_tex_id != r_fill_state.batch_tex_id)) { + change_batch = true; + } - RasterizerStorageGLES2::Texture *normal_map = storage->texture_owner.getornull(p_normal_map); + // we need to treat color change separately because we need to count these + // to decide whether to switch on the fly to colored vertices. + if (!r_fill_state.curr_batch->color.equals(col)) { + change_batch = true; + bdata.total_color_changes++; + } - if (!normal_map) { - state.current_normal = RID(); - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 2); - glBindTexture(GL_TEXTURE_2D, storage->resources.normal_tex); - state.canvas_shader.set_uniform(CanvasShaderGLES2::USE_DEFAULT_NORMAL, false); + if (change_batch) { + // put the tex pixel size in a local (less verbose and can be a register) + bdata.batch_textures[r_fill_state.batch_tex_id].tex_pixel_size.to(texpixel_size); - } else { + // need to preserve texpixel_size between items + r_fill_state.texpixel_size = texpixel_size; - if (normal_map->redraw_if_visible) { //check before proxy, because this is usually used with proxies - VisualServerRaster::redraw_request(); - } + // open new batch (this should never fail, it dynamically grows) + r_fill_state.curr_batch = _batch_request_new(false); - normal_map = normal_map->get_ptr(); + r_fill_state.curr_batch->type = Batch::BT_RECT; + r_fill_state.curr_batch->color.set(col); + r_fill_state.curr_batch->batch_texture_id = r_fill_state.batch_tex_id; + r_fill_state.curr_batch->first_command = command_num; + r_fill_state.curr_batch->num_commands = 1; + r_fill_state.curr_batch->first_quad = bdata.total_quads; + } else { + // we could alternatively do the count when closing a batch .. perhaps more efficient + r_fill_state.curr_batch->num_commands++; + } - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 2); - glBindTexture(GL_TEXTURE_2D, normal_map->tex_id); - state.current_normal = p_normal_map; - state.canvas_shader.set_uniform(CanvasShaderGLES2::USE_DEFAULT_NORMAL, true); - } + // fill the quad geometry + Vector2 mins = rect->rect.position; - } else { + if (transform_mode == TM_TRANSLATE) { + _software_transform_vertex(mins, transform); + } - state.current_normal = RID(); - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 2); - glBindTexture(GL_TEXTURE_2D, storage->resources.normal_tex); - state.canvas_shader.set_uniform(CanvasShaderGLES2::USE_DEFAULT_NORMAL, false); - } + Vector2 maxs = mins + rect->rect.size; - return tex_return; -} + // just aliases + BatchVertex *bA = &bvs[0]; + BatchVertex *bB = &bvs[1]; + BatchVertex *bC = &bvs[2]; + BatchVertex *bD = &bvs[3]; -void RasterizerCanvasGLES2::_draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights, const int *p_bones) { + bA->pos.x = mins.x; + bA->pos.y = mins.y; - glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif + bB->pos.x = maxs.x; + bB->pos.y = mins.y; - uint32_t buffer_ofs = 0; + bC->pos.x = maxs.x; + bC->pos.y = maxs.y; - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices); - glEnableVertexAttribArray(VS::ARRAY_VERTEX); - glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); - buffer_ofs += sizeof(Vector2) * p_vertex_count; + bD->pos.x = mins.x; + bD->pos.y = maxs.y; - if (p_singlecolor) { - glDisableVertexAttribArray(VS::ARRAY_COLOR); - Color m = *p_colors; - glVertexAttrib4f(VS::ARRAY_COLOR, m.r, m.g, m.b, m.a); - } else if (!p_colors) { - glDisableVertexAttribArray(VS::ARRAY_COLOR); - glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); - } else { - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Color) * p_vertex_count, p_colors); - glEnableVertexAttribArray(VS::ARRAY_COLOR); - glVertexAttribPointer(VS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(Color), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); - buffer_ofs += sizeof(Color) * p_vertex_count; - } + if (rect->rect.size.x < 0) { + SWAP(bA->pos, bB->pos); + SWAP(bC->pos, bD->pos); + } + if (rect->rect.size.y < 0) { + SWAP(bA->pos, bD->pos); + SWAP(bB->pos, bC->pos); + } - if (p_uvs) { - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_uvs); - glEnableVertexAttribArray(VS::ARRAY_TEX_UV); - glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); - buffer_ofs += sizeof(Vector2) * p_vertex_count; - } else { - glDisableVertexAttribArray(VS::ARRAY_TEX_UV); - } + if (transform_mode == TM_ALL) { + _software_transform_vertex(bA->pos, transform); + _software_transform_vertex(bB->pos, transform); + _software_transform_vertex(bC->pos, transform); + _software_transform_vertex(bD->pos, transform); + } - if (p_weights && p_bones) { - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(float) * 4 * p_vertex_count, p_weights); - glEnableVertexAttribArray(VS::ARRAY_WEIGHTS); - glVertexAttribPointer(VS::ARRAY_WEIGHTS, 4, GL_FLOAT, GL_FALSE, sizeof(float) * 4, CAST_INT_TO_UCHAR_PTR(buffer_ofs)); - buffer_ofs += sizeof(float) * 4 * p_vertex_count; + // uvs + Rect2 src_rect = (rect->flags & CANVAS_RECT_REGION) ? Rect2(rect->source.position * texpixel_size, rect->source.size * texpixel_size) : Rect2(0, 0, 1, 1); - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(int) * 4 * p_vertex_count, p_bones); - glEnableVertexAttribArray(VS::ARRAY_BONES); - glVertexAttribPointer(VS::ARRAY_BONES, 4, GL_UNSIGNED_INT, GL_FALSE, sizeof(int) * 4, CAST_INT_TO_UCHAR_PTR(buffer_ofs)); - buffer_ofs += sizeof(int) * 4 * p_vertex_count; + // 10% faster calculating the max first + Vector2 pos_max = src_rect.position + src_rect.size; + Vector2 uvs[4] = { + src_rect.position, + Vector2(pos_max.x, src_rect.position.y), + pos_max, + Vector2(src_rect.position.x, pos_max.y), + }; - } else { - glDisableVertexAttribArray(VS::ARRAY_WEIGHTS); - glDisableVertexAttribArray(VS::ARRAY_BONES); - } + if (rect->flags & CANVAS_RECT_TRANSPOSE) { + SWAP(uvs[1], uvs[3]); + } - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif + if (rect->flags & CANVAS_RECT_FLIP_H) { + SWAP(uvs[0], uvs[1]); + SWAP(uvs[2], uvs[3]); + } + if (rect->flags & CANVAS_RECT_FLIP_V) { + SWAP(uvs[0], uvs[3]); + SWAP(uvs[1], uvs[2]); + } - if (storage->config.support_32_bits_indices) { //should check for - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices); - glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0); - } else { - uint16_t *index16 = (uint16_t *)alloca(sizeof(uint16_t) * p_index_count); - for (int i = 0; i < p_index_count; i++) { - index16[i] = uint16_t(p_indices[i]); + bA->uv.set(uvs[0]); + bB->uv.set(uvs[1]); + bC->uv.set(uvs[2]); + bD->uv.set(uvs[3]); + + // increment quad count + bdata.total_quads++; + + } break; } - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(uint16_t) * p_index_count, index16); - glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_SHORT, 0); } - glBindBuffer(GL_ARRAY_BUFFER, 0); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); -} + // VERY IMPORTANT to return where we got to, because this func may be called multiple + // times per item. + // Don't miss out on this step by calling return earlier in the function without setting r_command_start. + r_command_start = command_num; -void RasterizerCanvasGLES2::_draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor) { + return false; +} - glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); +// convert the stupidly high amount of batches (each with its own color) +// to larger batches where the color is stored in the verts instead... +// There is a trade off. Non colored verts are smaller so work faster, but +// there comes a point where it is better to just use colored verts to avoid lots of +// batches. +void RasterizerCanvasGLES2::_batch_translate_to_colored() { + bdata.vertices_colored.reset(); + bdata.batches_temp.reset(); + + // As the vertices_colored and batches_temp are 'mirrors' of the non-colored version, + // the sizes should be equal, and allocations should never fail. Hence the use of debug + // asserts to check program flow, these should not occur at runtime unless the allocation + // code has been altered. +#ifdef DEBUG_ENABLED + CRASH_COND(bdata.vertices_colored.max_size() != bdata.vertices.max_size()); + CRASH_COND(bdata.batches_temp.max_size() != bdata.batches.max_size()); #endif - uint32_t buffer_ofs = 0; + Color curr_col(-1.0, -1.0, -1.0, -1.0); - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices); - glEnableVertexAttribArray(VS::ARRAY_VERTEX); - glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); - buffer_ofs += sizeof(Vector2) * p_vertex_count; - - if (p_singlecolor) { - glDisableVertexAttribArray(VS::ARRAY_COLOR); - Color m = *p_colors; - glVertexAttrib4f(VS::ARRAY_COLOR, m.r, m.g, m.b, m.a); - } else if (!p_colors) { - glDisableVertexAttribArray(VS::ARRAY_COLOR); - glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); - } else { - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Color) * p_vertex_count, p_colors); - glEnableVertexAttribArray(VS::ARRAY_COLOR); - glVertexAttribPointer(VS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(Color), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); - buffer_ofs += sizeof(Color) * p_vertex_count; - } + Batch *dest_batch = 0; - if (p_uvs) { - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_uvs); - glEnableVertexAttribArray(VS::ARRAY_TEX_UV); - glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); - } else { - glDisableVertexAttribArray(VS::ARRAY_TEX_UV); - } + // translate the batches into vertex colored batches + for (int n = 0; n < bdata.batches.size(); n++) { + const Batch &source_batch = bdata.batches[n]; - glDrawArrays(p_primitive, 0, p_vertex_count); + bool needs_new_batch; - glBindBuffer(GL_ARRAY_BUFFER, 0); -} + if (dest_batch) { + // is the dest batch the same except for the color? + if ((dest_batch->batch_texture_id == source_batch.batch_texture_id) && (dest_batch->type == source_batch.type)) { + // add to previous batch + dest_batch->num_commands += source_batch.num_commands; + needs_new_batch = false; -void RasterizerCanvasGLES2::_draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor) { + // create the colored verts (only if not default) + if (source_batch.type != Batch::BT_DEFAULT) { + int first_vert = source_batch.first_quad * 4; + int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands); - glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); + for (int v = first_vert; v < end_vert; v++) { + const BatchVertex &bv = bdata.vertices[v]; + BatchVertexColored *cv = bdata.vertices_colored.request(); +#ifdef DEBUG_ENABLED + CRASH_COND(!cv); #endif + cv->pos = bv.pos; + cv->uv = bv.uv; + cv->col = source_batch.color; + } + } + } else { + needs_new_batch = true; + } + } else { + needs_new_batch = true; + } - uint32_t buffer_ofs = 0; - - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices); - glEnableVertexAttribArray(VS::ARRAY_VERTEX); - glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); - buffer_ofs += sizeof(Vector2) * p_vertex_count; + if (needs_new_batch) { + dest_batch = bdata.batches_temp.request(); +#ifdef DEBUG_ENABLED + CRASH_COND(!dest_batch); +#endif - if (p_singlecolor) { - glDisableVertexAttribArray(VS::ARRAY_COLOR); - Color m = *p_colors; - glVertexAttrib4f(VS::ARRAY_COLOR, m.r, m.g, m.b, m.a); - } else if (!p_colors) { - glDisableVertexAttribArray(VS::ARRAY_COLOR); - glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); - } else { - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Color) * p_vertex_count, p_colors); - glEnableVertexAttribArray(VS::ARRAY_COLOR); - glVertexAttribPointer(VS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(Color), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); - buffer_ofs += sizeof(Color) * p_vertex_count; - } + *dest_batch = source_batch; - if (p_uvs) { - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_uvs); - glEnableVertexAttribArray(VS::ARRAY_TEX_UV); - glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); - buffer_ofs += sizeof(Vector2) * p_vertex_count; - } else { - glDisableVertexAttribArray(VS::ARRAY_TEX_UV); - } + // create the colored verts (only if not default) + if (source_batch.type != Batch::BT_DEFAULT) { + int first_vert = source_batch.first_quad * 4; + int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW); + for (int v = first_vert; v < end_vert; v++) { + const BatchVertex &bv = bdata.vertices[v]; + BatchVertexColored *cv = bdata.vertices_colored.request(); +#ifdef DEBUG_ENABLED + CRASH_COND(!cv); #endif - - if (storage->config.support_32_bits_indices) { //should check for - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices); - glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0); - } else { - uint16_t *index16 = (uint16_t *)alloca(sizeof(uint16_t) * p_index_count); - for (int i = 0; i < p_index_count; i++) { - index16[i] = uint16_t(p_indices[i]); + cv->pos = bv.pos; + cv->uv = bv.uv; + cv->col = source_batch.color; + } + } } - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(uint16_t) * p_index_count, index16); - glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_SHORT, 0); } - glBindBuffer(GL_ARRAY_BUFFER, 0); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + // copy the temporary batches to the master batch list (this could be avoided but it makes the code cleaner) + bdata.batches.copy_from(bdata.batches_temp); } -void RasterizerCanvasGLES2::_draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs) { - - static const GLenum prim[5] = { GL_POINTS, GL_POINTS, GL_LINES, GL_TRIANGLES, GL_TRIANGLE_FAN }; +void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, RasterizerStorageGLES2::Material *p_material) { - int color_offset = 0; - int uv_offset = 0; - int stride = 2; - - if (p_colors) { - color_offset = stride; - stride += 4; - } + ERR_FAIL_COND(p_batch.num_commands <= 0); - if (p_uvs) { - uv_offset = stride; - stride += 2; + const bool &colored_verts = bdata.use_colored_vertices; + int sizeof_vert; + if (!colored_verts) { + sizeof_vert = sizeof(BatchVertex); + } else { + sizeof_vert = sizeof(BatchVertexColored); } - float buffer_data[(2 + 2 + 4) * 4]; + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); - for (int i = 0; i < p_points; i++) { - buffer_data[stride * i + 0] = p_vertices[i].x; - buffer_data[stride * i + 1] = p_vertices[i].y; + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); } - if (p_colors) { - for (int i = 0; i < p_points; i++) { - buffer_data[stride * i + color_offset + 0] = p_colors[i].r; - buffer_data[stride * i + color_offset + 1] = p_colors[i].g; - buffer_data[stride * i + color_offset + 2] = p_colors[i].b; - buffer_data[stride * i + color_offset + 3] = p_colors[i].a; - } - } + // batch tex + const BatchTex &tex = bdata.batch_textures[p_batch.batch_texture_id]; - if (p_uvs) { - for (int i = 0; i < p_points; i++) { - buffer_data[stride * i + uv_offset + 0] = p_uvs[i].x; - buffer_data[stride * i + uv_offset + 1] = p_uvs[i].y; - } - } + _bind_canvas_texture(tex.RID_texture, tex.RID_normal); - glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif - glBufferSubData(GL_ARRAY_BUFFER, 0, p_points * stride * 4 * sizeof(float), buffer_data); + // bind the index and vertex buffer + glBindBuffer(GL_ARRAY_BUFFER, bdata.gl_vertex_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, bdata.gl_index_buffer); + + uint64_t pointer = 0; + glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof_vert, (const void *)pointer); - glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, stride * sizeof(float), NULL); + // always send UVs, even within a texture specified because a shader can still use UVs + glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, sizeof_vert, CAST_INT_TO_UCHAR_PTR(pointer + (2 * 4))); + glEnableVertexAttribArray(VS::ARRAY_TEX_UV); - if (p_colors) { - glVertexAttribPointer(VS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(color_offset * sizeof(float))); + // color + if (!colored_verts) { + glDisableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttrib4fv(VS::ARRAY_COLOR, p_batch.color.get_data()); + } else { + glVertexAttribPointer(VS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof_vert, CAST_INT_TO_UCHAR_PTR(pointer + (4 * 4))); glEnableVertexAttribArray(VS::ARRAY_COLOR); } - if (p_uvs) { - glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(uv_offset * sizeof(float))); - glEnableVertexAttribArray(VS::ARRAY_TEX_UV); + switch (tex.tile_mode) { + case BatchTex::TILE_FORCE_REPEAT: { + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_FORCE_REPEAT, true); + } break; + case BatchTex::TILE_NORMAL: { + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + } break; + default: { + } break; + } + + // we need to convert explicitly from pod Vec2 to Vector2 ... + // could use a cast but this might be unsafe in future + Vector2 tps; + tex.tex_pixel_size.to(tps); + state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, tps); + + int64_t offset = p_batch.first_quad * 6 * 2; // 6 inds per quad at 2 bytes each + + int num_elements = p_batch.num_commands * 6; + glDrawElements(GL_TRIANGLES, num_elements, GL_UNSIGNED_SHORT, (void *)offset); + + switch (tex.tile_mode) { + case BatchTex::TILE_FORCE_REPEAT: { + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_FORCE_REPEAT, false); + } break; + case BatchTex::TILE_NORMAL: { + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } break; + default: { + } break; } - glDrawArrays(prim[p_points], 0, p_points); + glDisableVertexAttribArray(VS::ARRAY_TEX_UV); + glDisableVertexAttribArray(VS::ARRAY_COLOR); + // may not be necessary .. state change optimization still TODO glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } -static const GLenum gl_primitive[] = { - GL_POINTS, - GL_LINES, - GL_LINE_STRIP, - GL_LINE_LOOP, - GL_TRIANGLES, - GL_TRIANGLE_STRIP, - GL_TRIANGLE_FAN -}; +void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material) { -void RasterizerCanvasGLES2::_canvas_item_render_commands(Item *p_item, Item *current_clip, bool &reclip, RasterizerStorageGLES2::Material *p_material) { + int num_batches = bdata.batches.size(); - int command_count = p_item->commands.size(); - Item::Command **commands = p_item->commands.ptrw(); + for (int batch_num = 0; batch_num < num_batches; batch_num++) { + const Batch &batch = bdata.batches[batch_num]; - for (int i = 0; i < command_count; i++) { + switch (batch.type) { + case Batch::BT_RECT: { + _batch_render_rects(batch, p_material); + } break; + default: { + int end_command = batch.first_command + batch.num_commands; - Item::Command *command = commands[i]; + for (int i = batch.first_command; i < end_command; i++) { - switch (command->type) { + Item::Command *command = p_commands[i]; - case Item::Command::TYPE_LINE: { + switch (command->type) { - Item::CommandLine *line = static_cast(command); + case Item::Command::TYPE_LINE: { - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + Item::CommandLine *line = static_cast(command); - _bind_canvas_texture(RID(), RID()); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } + + _bind_canvas_texture(RID(), RID()); - glDisableVertexAttribArray(VS::ARRAY_COLOR); - glVertexAttrib4fv(VS::ARRAY_COLOR, line->color.components); + glDisableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttrib4fv(VS::ARRAY_COLOR, line->color.components); - state.canvas_shader.set_uniform(CanvasShaderGLES2::MODELVIEW_MATRIX, state.uniforms.modelview_matrix); + state.canvas_shader.set_uniform(CanvasShaderGLES2::MODELVIEW_MATRIX, state.uniforms.modelview_matrix); - if (line->width <= 1) { - Vector2 verts[2] = { - Vector2(line->from.x, line->from.y), - Vector2(line->to.x, line->to.y) - }; + if (line->width <= 1) { + Vector2 verts[2] = { + Vector2(line->from.x, line->from.y), + Vector2(line->to.x, line->to.y) + }; #ifdef GLES_OVER_GL - if (line->antialiased) - glEnable(GL_LINE_SMOOTH); + if (line->antialiased) + glEnable(GL_LINE_SMOOTH); #endif - _draw_gui_primitive(2, verts, NULL, NULL); + _draw_gui_primitive(2, verts, NULL, NULL); #ifdef GLES_OVER_GL - if (line->antialiased) - glDisable(GL_LINE_SMOOTH); + if (line->antialiased) + glDisable(GL_LINE_SMOOTH); #endif - } else { - Vector2 t = (line->from - line->to).normalized().tangent() * line->width * 0.5; + } else { + Vector2 t = (line->from - line->to).normalized().tangent() * line->width * 0.5; - Vector2 verts[4] = { - line->from - t, - line->from + t, - line->to + t, - line->to - t - }; + Vector2 verts[4] = { + line->from - t, + line->from + t, + line->to + t, + line->to - t + }; - _draw_gui_primitive(4, verts, NULL, NULL); + _draw_gui_primitive(4, verts, NULL, NULL); #ifdef GLES_OVER_GL - if (line->antialiased) { - glEnable(GL_LINE_SMOOTH); - for (int j = 0; j < 4; j++) { - Vector2 vertsl[2] = { - verts[j], - verts[(j + 1) % 4], - }; - _draw_gui_primitive(2, vertsl, NULL, NULL); - } - glDisable(GL_LINE_SMOOTH); - } + if (line->antialiased) { + glEnable(GL_LINE_SMOOTH); + for (int j = 0; j < 4; j++) { + Vector2 vertsl[2] = { + verts[j], + verts[(j + 1) % 4], + }; + _draw_gui_primitive(2, vertsl, NULL, NULL); + } + glDisable(GL_LINE_SMOOTH); + } #endif - } - } break; + } + } break; - case Item::Command::TYPE_RECT: { + case Item::Command::TYPE_RECT: { - Item::CommandRect *r = static_cast(command); + Item::CommandRect *r = static_cast(command); - glDisableVertexAttribArray(VS::ARRAY_COLOR); - glVertexAttrib4fv(VS::ARRAY_COLOR, r->modulate.components); + glDisableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttrib4fv(VS::ARRAY_COLOR, r->modulate.components); - bool can_tile = true; - if (r->texture.is_valid() && r->flags & CANVAS_RECT_TILE && !storage->config.support_npot_repeat_mipmap) { - // workaround for when setting tiling does not work due to hardware limitation + bool can_tile = true; + if (r->texture.is_valid() && r->flags & CANVAS_RECT_TILE && !storage->config.support_npot_repeat_mipmap) { + // workaround for when setting tiling does not work due to hardware limitation - RasterizerStorageGLES2::Texture *texture = storage->texture_owner.getornull(r->texture); + RasterizerStorageGLES2::Texture *texture = storage->texture_owner.getornull(r->texture); - if (texture) { + if (texture) { - texture = texture->get_ptr(); + texture = texture->get_ptr(); - if (next_power_of_2(texture->alloc_width) != (unsigned int)texture->alloc_width && next_power_of_2(texture->alloc_height) != (unsigned int)texture->alloc_height) { - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_FORCE_REPEAT, true); - can_tile = false; - } - } - } + if (next_power_of_2(texture->alloc_width) != (unsigned int)texture->alloc_width && next_power_of_2(texture->alloc_height) != (unsigned int)texture->alloc_height) { + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_FORCE_REPEAT, true); + can_tile = false; + } + } + } - // On some widespread Nvidia cards, the normal draw method can produce some - // flickering in draw_rect and especially TileMap rendering (tiles randomly flicker). - // See GH-9913. - // To work it around, we use a simpler draw method which does not flicker, but gives - // a non negligible performance hit, so it's opt-in (GH-24466). - if (use_nvidia_rect_workaround) { - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); - - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + // On some widespread Nvidia cards, the normal draw method can produce some + // flickering in draw_rect and especially TileMap rendering (tiles randomly flicker). + // See GH-9913. + // To work it around, we use a simpler draw method which does not flicker, but gives + // a non negligible performance hit, so it's opt-in (GH-24466). + if (use_nvidia_rect_workaround) { + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } - Vector2 points[4] = { - r->rect.position, - r->rect.position + Vector2(r->rect.size.x, 0.0), - r->rect.position + r->rect.size, - r->rect.position + Vector2(0.0, r->rect.size.y), - }; + Vector2 points[4] = { + r->rect.position, + r->rect.position + Vector2(r->rect.size.x, 0.0), + r->rect.position + r->rect.size, + r->rect.position + Vector2(0.0, r->rect.size.y), + }; - if (r->rect.size.x < 0) { - SWAP(points[0], points[1]); - SWAP(points[2], points[3]); - } - if (r->rect.size.y < 0) { - SWAP(points[0], points[3]); - SWAP(points[1], points[2]); - } + if (r->rect.size.x < 0) { + SWAP(points[0], points[1]); + SWAP(points[2], points[3]); + } + if (r->rect.size.y < 0) { + SWAP(points[0], points[3]); + SWAP(points[1], points[2]); + } - RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(r->texture, r->normal_map); - - if (texture) { - Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); - - Rect2 src_rect = (r->flags & CANVAS_RECT_REGION) ? Rect2(r->source.position * texpixel_size, r->source.size * texpixel_size) : Rect2(0, 0, 1, 1); - - Vector2 uvs[4] = { - src_rect.position, - src_rect.position + Vector2(src_rect.size.x, 0.0), - src_rect.position + src_rect.size, - src_rect.position + Vector2(0.0, src_rect.size.y), - }; - - if (r->flags & CANVAS_RECT_TRANSPOSE) { - SWAP(uvs[1], uvs[3]); - } - - if (r->flags & CANVAS_RECT_FLIP_H) { - SWAP(uvs[0], uvs[1]); - SWAP(uvs[2], uvs[3]); - } - if (r->flags & CANVAS_RECT_FLIP_V) { - SWAP(uvs[0], uvs[3]); - SWAP(uvs[1], uvs[2]); - } - - state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); - - bool untile = false; - - if (can_tile && r->flags & CANVAS_RECT_TILE && !(texture->flags & VS::TEXTURE_FLAG_REPEAT)) { - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); - untile = true; - } - - _draw_gui_primitive(4, points, NULL, uvs); - - if (untile) { - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - } - } else { - static const Vector2 uvs[4] = { - Vector2(0.0, 0.0), - Vector2(0.0, 1.0), - Vector2(1.0, 1.0), - Vector2(1.0, 0.0), - }; - - state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, Vector2()); - _draw_gui_primitive(4, points, NULL, uvs); - } + RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(r->texture, r->normal_map); - } else { - // This branch is better for performance, but can produce flicker on Nvidia, see above comment. - _bind_quad_buffer(); + if (texture) { + Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); + Rect2 src_rect = (r->flags & CANVAS_RECT_REGION) ? Rect2(r->source.position * texpixel_size, r->source.size * texpixel_size) : Rect2(0, 0, 1, 1); - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + Vector2 uvs[4] = { + src_rect.position, + src_rect.position + Vector2(src_rect.size.x, 0.0), + src_rect.position + src_rect.size, + src_rect.position + Vector2(0.0, src_rect.size.y), + }; + + if (r->flags & CANVAS_RECT_TRANSPOSE) { + SWAP(uvs[1], uvs[3]); + } - RasterizerStorageGLES2::Texture *tex = _bind_canvas_texture(r->texture, r->normal_map); + if (r->flags & CANVAS_RECT_FLIP_H) { + SWAP(uvs[0], uvs[1]); + SWAP(uvs[2], uvs[3]); + } + if (r->flags & CANVAS_RECT_FLIP_V) { + SWAP(uvs[0], uvs[3]); + SWAP(uvs[1], uvs[2]); + } - if (!tex) { - Rect2 dst_rect = Rect2(r->rect.position, r->rect.size); + state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); - if (dst_rect.size.width < 0) { - dst_rect.position.x += dst_rect.size.width; - dst_rect.size.width *= -1; - } - if (dst_rect.size.height < 0) { - dst_rect.position.y += dst_rect.size.height; - dst_rect.size.height *= -1; - } + bool untile = false; - state.canvas_shader.set_uniform(CanvasShaderGLES2::DST_RECT, Color(dst_rect.position.x, dst_rect.position.y, dst_rect.size.x, dst_rect.size.y)); - state.canvas_shader.set_uniform(CanvasShaderGLES2::SRC_RECT, Color(0, 0, 1, 1)); + if (can_tile && r->flags & CANVAS_RECT_TILE && !(texture->flags & VS::TEXTURE_FLAG_REPEAT)) { + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + untile = true; + } - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); - } else { + _draw_gui_primitive(4, points, NULL, uvs); - bool untile = false; + if (untile) { + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + } else { + static const Vector2 uvs[4] = { + Vector2(0.0, 0.0), + Vector2(0.0, 1.0), + Vector2(1.0, 1.0), + Vector2(1.0, 0.0), + }; + + state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, Vector2()); + _draw_gui_primitive(4, points, NULL, uvs); + } - if (can_tile && r->flags & CANVAS_RECT_TILE && !(tex->flags & VS::TEXTURE_FLAG_REPEAT)) { - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); - untile = true; - } + } else { + // This branch is better for performance, but can produce flicker on Nvidia, see above comment. + _bind_quad_buffer(); - Size2 texpixel_size(1.0 / tex->width, 1.0 / tex->height); - Rect2 src_rect = (r->flags & CANVAS_RECT_REGION) ? Rect2(r->source.position * texpixel_size, r->source.size * texpixel_size) : Rect2(0, 0, 1, 1); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); - Rect2 dst_rect = Rect2(r->rect.position, r->rect.size); + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } - if (dst_rect.size.width < 0) { - dst_rect.position.x += dst_rect.size.width; - dst_rect.size.width *= -1; - } - if (dst_rect.size.height < 0) { - dst_rect.position.y += dst_rect.size.height; - dst_rect.size.height *= -1; - } + RasterizerStorageGLES2::Texture *tex = _bind_canvas_texture(r->texture, r->normal_map); - if (r->flags & CANVAS_RECT_FLIP_H) { - src_rect.size.x *= -1; - } + if (!tex) { + Rect2 dst_rect = Rect2(r->rect.position, r->rect.size); - if (r->flags & CANVAS_RECT_FLIP_V) { - src_rect.size.y *= -1; - } + if (dst_rect.size.width < 0) { + dst_rect.position.x += dst_rect.size.width; + dst_rect.size.width *= -1; + } + if (dst_rect.size.height < 0) { + dst_rect.position.y += dst_rect.size.height; + dst_rect.size.height *= -1; + } - if (r->flags & CANVAS_RECT_TRANSPOSE) { - dst_rect.size.x *= -1; // Encoding in the dst_rect.z uniform - } + state.canvas_shader.set_uniform(CanvasShaderGLES2::DST_RECT, Color(dst_rect.position.x, dst_rect.position.y, dst_rect.size.x, dst_rect.size.y)); + state.canvas_shader.set_uniform(CanvasShaderGLES2::SRC_RECT, Color(0, 0, 1, 1)); - state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + } else { - state.canvas_shader.set_uniform(CanvasShaderGLES2::DST_RECT, Color(dst_rect.position.x, dst_rect.position.y, dst_rect.size.x, dst_rect.size.y)); - state.canvas_shader.set_uniform(CanvasShaderGLES2::SRC_RECT, Color(src_rect.position.x, src_rect.position.y, src_rect.size.x, src_rect.size.y)); + bool untile = false; - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + if (can_tile && r->flags & CANVAS_RECT_TILE && !(tex->flags & VS::TEXTURE_FLAG_REPEAT)) { + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + untile = true; + } - if (untile) { - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - } - } + Size2 texpixel_size(1.0 / tex->width, 1.0 / tex->height); + Rect2 src_rect = (r->flags & CANVAS_RECT_REGION) ? Rect2(r->source.position * texpixel_size, r->source.size * texpixel_size) : Rect2(0, 0, 1, 1); - glBindBuffer(GL_ARRAY_BUFFER, 0); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - } + Rect2 dst_rect = Rect2(r->rect.position, r->rect.size); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_FORCE_REPEAT, false); + if (dst_rect.size.width < 0) { + dst_rect.position.x += dst_rect.size.width; + dst_rect.size.width *= -1; + } + if (dst_rect.size.height < 0) { + dst_rect.position.y += dst_rect.size.height; + dst_rect.size.height *= -1; + } - } break; + if (r->flags & CANVAS_RECT_FLIP_H) { + src_rect.size.x *= -1; + } - case Item::Command::TYPE_NINEPATCH: { + if (r->flags & CANVAS_RECT_FLIP_V) { + src_rect.size.y *= -1; + } - Item::CommandNinePatch *np = static_cast(command); + if (r->flags & CANVAS_RECT_TRANSPOSE) { + dst_rect.size.x *= -1; // Encoding in the dst_rect.z uniform + } - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); - glDisableVertexAttribArray(VS::ARRAY_COLOR); - glVertexAttrib4fv(VS::ARRAY_COLOR, np->color.components); + state.canvas_shader.set_uniform(CanvasShaderGLES2::DST_RECT, Color(dst_rect.position.x, dst_rect.position.y, dst_rect.size.x, dst_rect.size.y)); + state.canvas_shader.set_uniform(CanvasShaderGLES2::SRC_RECT, Color(src_rect.position.x, src_rect.position.y, src_rect.size.x, src_rect.size.y)); - RasterizerStorageGLES2::Texture *tex = _bind_canvas_texture(np->texture, np->normal_map); + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); - if (!tex) { - // FIXME: Handle textureless ninepatch gracefully - WARN_PRINT("NinePatch without texture not supported yet in GLES2 backend, skipping."); - continue; - } - if (tex->width == 0 || tex->height == 0) { - WARN_PRINT("Cannot set empty texture to NinePatch."); - continue; - } + if (untile) { + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + } - Size2 texpixel_size(1.0 / tex->width, 1.0 / tex->height); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } - // state.canvas_shader.set_uniform(CanvasShaderGLES2::MODELVIEW_MATRIX, state.uniforms.modelview_matrix); - state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_FORCE_REPEAT, false); - Rect2 source = np->source; - if (source.size.x == 0 && source.size.y == 0) { - source.size.x = tex->width; - source.size.y = tex->height; - } + } break; - float screen_scale = 1.0; + case Item::Command::TYPE_NINEPATCH: { - if (source.size.x != 0 && source.size.y != 0) { + Item::CommandNinePatch *np = static_cast(command); - screen_scale = MIN(np->rect.size.x / source.size.x, np->rect.size.y / source.size.y); - screen_scale = MIN(1.0, screen_scale); - } + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } - // prepare vertex buffer + glDisableVertexAttribArray(VS::ARRAY_COLOR); + glVertexAttrib4fv(VS::ARRAY_COLOR, np->color.components); - // this buffer contains [ POS POS UV UV ] * + RasterizerStorageGLES2::Texture *tex = _bind_canvas_texture(np->texture, np->normal_map); - float buffer[16 * 2 + 16 * 2]; + if (!tex) { + // FIXME: Handle textureless ninepatch gracefully + WARN_PRINT("NinePatch without texture not supported yet in GLES2 backend, skipping."); + continue; + } + if (tex->width == 0 || tex->height == 0) { + WARN_PRINT("Cannot set empty texture to NinePatch."); + continue; + } - { + Size2 texpixel_size(1.0 / tex->width, 1.0 / tex->height); - // first row + // state.canvas_shader.set_uniform(CanvasShaderGLES2::MODELVIEW_MATRIX, state.uniforms.modelview_matrix); + state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); - buffer[(0 * 4 * 4) + 0] = np->rect.position.x; - buffer[(0 * 4 * 4) + 1] = np->rect.position.y; + Rect2 source = np->source; + if (source.size.x == 0 && source.size.y == 0) { + source.size.x = tex->width; + source.size.y = tex->height; + } - buffer[(0 * 4 * 4) + 2] = source.position.x * texpixel_size.x; - buffer[(0 * 4 * 4) + 3] = source.position.y * texpixel_size.y; + float screen_scale = 1.0; - buffer[(0 * 4 * 4) + 4] = np->rect.position.x + np->margin[MARGIN_LEFT] * screen_scale; - buffer[(0 * 4 * 4) + 5] = np->rect.position.y; + if (source.size.x != 0 && source.size.y != 0) { - buffer[(0 * 4 * 4) + 6] = (source.position.x + np->margin[MARGIN_LEFT]) * texpixel_size.x; - buffer[(0 * 4 * 4) + 7] = source.position.y * texpixel_size.y; + screen_scale = MIN(np->rect.size.x / source.size.x, np->rect.size.y / source.size.y); + screen_scale = MIN(1.0, screen_scale); + } - buffer[(0 * 4 * 4) + 8] = np->rect.position.x + np->rect.size.x - np->margin[MARGIN_RIGHT] * screen_scale; - buffer[(0 * 4 * 4) + 9] = np->rect.position.y; + // prepare vertex buffer - buffer[(0 * 4 * 4) + 10] = (source.position.x + source.size.x - np->margin[MARGIN_RIGHT]) * texpixel_size.x; - buffer[(0 * 4 * 4) + 11] = source.position.y * texpixel_size.y; + // this buffer contains [ POS POS UV UV ] * - buffer[(0 * 4 * 4) + 12] = np->rect.position.x + np->rect.size.x; - buffer[(0 * 4 * 4) + 13] = np->rect.position.y; + float buffer[16 * 2 + 16 * 2]; - buffer[(0 * 4 * 4) + 14] = (source.position.x + source.size.x) * texpixel_size.x; - buffer[(0 * 4 * 4) + 15] = source.position.y * texpixel_size.y; + { - // second row + // first row - buffer[(1 * 4 * 4) + 0] = np->rect.position.x; - buffer[(1 * 4 * 4) + 1] = np->rect.position.y + np->margin[MARGIN_TOP] * screen_scale; + buffer[(0 * 4 * 4) + 0] = np->rect.position.x; + buffer[(0 * 4 * 4) + 1] = np->rect.position.y; - buffer[(1 * 4 * 4) + 2] = source.position.x * texpixel_size.x; - buffer[(1 * 4 * 4) + 3] = (source.position.y + np->margin[MARGIN_TOP]) * texpixel_size.y; + buffer[(0 * 4 * 4) + 2] = source.position.x * texpixel_size.x; + buffer[(0 * 4 * 4) + 3] = source.position.y * texpixel_size.y; - buffer[(1 * 4 * 4) + 4] = np->rect.position.x + np->margin[MARGIN_LEFT] * screen_scale; - buffer[(1 * 4 * 4) + 5] = np->rect.position.y + np->margin[MARGIN_TOP] * screen_scale; + buffer[(0 * 4 * 4) + 4] = np->rect.position.x + np->margin[MARGIN_LEFT] * screen_scale; + buffer[(0 * 4 * 4) + 5] = np->rect.position.y; - buffer[(1 * 4 * 4) + 6] = (source.position.x + np->margin[MARGIN_LEFT]) * texpixel_size.x; - buffer[(1 * 4 * 4) + 7] = (source.position.y + np->margin[MARGIN_TOP]) * texpixel_size.y; + buffer[(0 * 4 * 4) + 6] = (source.position.x + np->margin[MARGIN_LEFT]) * texpixel_size.x; + buffer[(0 * 4 * 4) + 7] = source.position.y * texpixel_size.y; - buffer[(1 * 4 * 4) + 8] = np->rect.position.x + np->rect.size.x - np->margin[MARGIN_RIGHT] * screen_scale; - buffer[(1 * 4 * 4) + 9] = np->rect.position.y + np->margin[MARGIN_TOP] * screen_scale; + buffer[(0 * 4 * 4) + 8] = np->rect.position.x + np->rect.size.x - np->margin[MARGIN_RIGHT] * screen_scale; + buffer[(0 * 4 * 4) + 9] = np->rect.position.y; - buffer[(1 * 4 * 4) + 10] = (source.position.x + source.size.x - np->margin[MARGIN_RIGHT]) * texpixel_size.x; - buffer[(1 * 4 * 4) + 11] = (source.position.y + np->margin[MARGIN_TOP]) * texpixel_size.y; + buffer[(0 * 4 * 4) + 10] = (source.position.x + source.size.x - np->margin[MARGIN_RIGHT]) * texpixel_size.x; + buffer[(0 * 4 * 4) + 11] = source.position.y * texpixel_size.y; - buffer[(1 * 4 * 4) + 12] = np->rect.position.x + np->rect.size.x; - buffer[(1 * 4 * 4) + 13] = np->rect.position.y + np->margin[MARGIN_TOP] * screen_scale; + buffer[(0 * 4 * 4) + 12] = np->rect.position.x + np->rect.size.x; + buffer[(0 * 4 * 4) + 13] = np->rect.position.y; - buffer[(1 * 4 * 4) + 14] = (source.position.x + source.size.x) * texpixel_size.x; - buffer[(1 * 4 * 4) + 15] = (source.position.y + np->margin[MARGIN_TOP]) * texpixel_size.y; + buffer[(0 * 4 * 4) + 14] = (source.position.x + source.size.x) * texpixel_size.x; + buffer[(0 * 4 * 4) + 15] = source.position.y * texpixel_size.y; - // third row + // second row - buffer[(2 * 4 * 4) + 0] = np->rect.position.x; - buffer[(2 * 4 * 4) + 1] = np->rect.position.y + np->rect.size.y - np->margin[MARGIN_BOTTOM] * screen_scale; + buffer[(1 * 4 * 4) + 0] = np->rect.position.x; + buffer[(1 * 4 * 4) + 1] = np->rect.position.y + np->margin[MARGIN_TOP] * screen_scale; - buffer[(2 * 4 * 4) + 2] = source.position.x * texpixel_size.x; - buffer[(2 * 4 * 4) + 3] = (source.position.y + source.size.y - np->margin[MARGIN_BOTTOM]) * texpixel_size.y; + buffer[(1 * 4 * 4) + 2] = source.position.x * texpixel_size.x; + buffer[(1 * 4 * 4) + 3] = (source.position.y + np->margin[MARGIN_TOP]) * texpixel_size.y; - buffer[(2 * 4 * 4) + 4] = np->rect.position.x + np->margin[MARGIN_LEFT] * screen_scale; - buffer[(2 * 4 * 4) + 5] = np->rect.position.y + np->rect.size.y - np->margin[MARGIN_BOTTOM] * screen_scale; + buffer[(1 * 4 * 4) + 4] = np->rect.position.x + np->margin[MARGIN_LEFT] * screen_scale; + buffer[(1 * 4 * 4) + 5] = np->rect.position.y + np->margin[MARGIN_TOP] * screen_scale; - buffer[(2 * 4 * 4) + 6] = (source.position.x + np->margin[MARGIN_LEFT]) * texpixel_size.x; - buffer[(2 * 4 * 4) + 7] = (source.position.y + source.size.y - np->margin[MARGIN_BOTTOM]) * texpixel_size.y; + buffer[(1 * 4 * 4) + 6] = (source.position.x + np->margin[MARGIN_LEFT]) * texpixel_size.x; + buffer[(1 * 4 * 4) + 7] = (source.position.y + np->margin[MARGIN_TOP]) * texpixel_size.y; - buffer[(2 * 4 * 4) + 8] = np->rect.position.x + np->rect.size.x - np->margin[MARGIN_RIGHT] * screen_scale; - buffer[(2 * 4 * 4) + 9] = np->rect.position.y + np->rect.size.y - np->margin[MARGIN_BOTTOM] * screen_scale; + buffer[(1 * 4 * 4) + 8] = np->rect.position.x + np->rect.size.x - np->margin[MARGIN_RIGHT] * screen_scale; + buffer[(1 * 4 * 4) + 9] = np->rect.position.y + np->margin[MARGIN_TOP] * screen_scale; - buffer[(2 * 4 * 4) + 10] = (source.position.x + source.size.x - np->margin[MARGIN_RIGHT]) * texpixel_size.x; - buffer[(2 * 4 * 4) + 11] = (source.position.y + source.size.y - np->margin[MARGIN_BOTTOM]) * texpixel_size.y; + buffer[(1 * 4 * 4) + 10] = (source.position.x + source.size.x - np->margin[MARGIN_RIGHT]) * texpixel_size.x; + buffer[(1 * 4 * 4) + 11] = (source.position.y + np->margin[MARGIN_TOP]) * texpixel_size.y; - buffer[(2 * 4 * 4) + 12] = np->rect.position.x + np->rect.size.x; - buffer[(2 * 4 * 4) + 13] = np->rect.position.y + np->rect.size.y - np->margin[MARGIN_BOTTOM] * screen_scale; + buffer[(1 * 4 * 4) + 12] = np->rect.position.x + np->rect.size.x; + buffer[(1 * 4 * 4) + 13] = np->rect.position.y + np->margin[MARGIN_TOP] * screen_scale; - buffer[(2 * 4 * 4) + 14] = (source.position.x + source.size.x) * texpixel_size.x; - buffer[(2 * 4 * 4) + 15] = (source.position.y + source.size.y - np->margin[MARGIN_BOTTOM]) * texpixel_size.y; + buffer[(1 * 4 * 4) + 14] = (source.position.x + source.size.x) * texpixel_size.x; + buffer[(1 * 4 * 4) + 15] = (source.position.y + np->margin[MARGIN_TOP]) * texpixel_size.y; - // fourth row + // third row - buffer[(3 * 4 * 4) + 0] = np->rect.position.x; - buffer[(3 * 4 * 4) + 1] = np->rect.position.y + np->rect.size.y; + buffer[(2 * 4 * 4) + 0] = np->rect.position.x; + buffer[(2 * 4 * 4) + 1] = np->rect.position.y + np->rect.size.y - np->margin[MARGIN_BOTTOM] * screen_scale; - buffer[(3 * 4 * 4) + 2] = source.position.x * texpixel_size.x; - buffer[(3 * 4 * 4) + 3] = (source.position.y + source.size.y) * texpixel_size.y; + buffer[(2 * 4 * 4) + 2] = source.position.x * texpixel_size.x; + buffer[(2 * 4 * 4) + 3] = (source.position.y + source.size.y - np->margin[MARGIN_BOTTOM]) * texpixel_size.y; - buffer[(3 * 4 * 4) + 4] = np->rect.position.x + np->margin[MARGIN_LEFT] * screen_scale; - buffer[(3 * 4 * 4) + 5] = np->rect.position.y + np->rect.size.y; + buffer[(2 * 4 * 4) + 4] = np->rect.position.x + np->margin[MARGIN_LEFT] * screen_scale; + buffer[(2 * 4 * 4) + 5] = np->rect.position.y + np->rect.size.y - np->margin[MARGIN_BOTTOM] * screen_scale; - buffer[(3 * 4 * 4) + 6] = (source.position.x + np->margin[MARGIN_LEFT]) * texpixel_size.x; - buffer[(3 * 4 * 4) + 7] = (source.position.y + source.size.y) * texpixel_size.y; + buffer[(2 * 4 * 4) + 6] = (source.position.x + np->margin[MARGIN_LEFT]) * texpixel_size.x; + buffer[(2 * 4 * 4) + 7] = (source.position.y + source.size.y - np->margin[MARGIN_BOTTOM]) * texpixel_size.y; - buffer[(3 * 4 * 4) + 8] = np->rect.position.x + np->rect.size.x - np->margin[MARGIN_RIGHT] * screen_scale; - buffer[(3 * 4 * 4) + 9] = np->rect.position.y + np->rect.size.y; + buffer[(2 * 4 * 4) + 8] = np->rect.position.x + np->rect.size.x - np->margin[MARGIN_RIGHT] * screen_scale; + buffer[(2 * 4 * 4) + 9] = np->rect.position.y + np->rect.size.y - np->margin[MARGIN_BOTTOM] * screen_scale; - buffer[(3 * 4 * 4) + 10] = (source.position.x + source.size.x - np->margin[MARGIN_RIGHT]) * texpixel_size.x; - buffer[(3 * 4 * 4) + 11] = (source.position.y + source.size.y) * texpixel_size.y; + buffer[(2 * 4 * 4) + 10] = (source.position.x + source.size.x - np->margin[MARGIN_RIGHT]) * texpixel_size.x; + buffer[(2 * 4 * 4) + 11] = (source.position.y + source.size.y - np->margin[MARGIN_BOTTOM]) * texpixel_size.y; - buffer[(3 * 4 * 4) + 12] = np->rect.position.x + np->rect.size.x; - buffer[(3 * 4 * 4) + 13] = np->rect.position.y + np->rect.size.y; + buffer[(2 * 4 * 4) + 12] = np->rect.position.x + np->rect.size.x; + buffer[(2 * 4 * 4) + 13] = np->rect.position.y + np->rect.size.y - np->margin[MARGIN_BOTTOM] * screen_scale; - buffer[(3 * 4 * 4) + 14] = (source.position.x + source.size.x) * texpixel_size.x; - buffer[(3 * 4 * 4) + 15] = (source.position.y + source.size.y) * texpixel_size.y; - } + buffer[(2 * 4 * 4) + 14] = (source.position.x + source.size.x) * texpixel_size.x; + buffer[(2 * 4 * 4) + 15] = (source.position.y + source.size.y - np->margin[MARGIN_BOTTOM]) * texpixel_size.y; - glBindBuffer(GL_ARRAY_BUFFER, data.ninepatch_vertices); - glBufferData(GL_ARRAY_BUFFER, sizeof(float) * (16 + 16) * 2, buffer, GL_DYNAMIC_DRAW); + // fourth row - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.ninepatch_elements); + buffer[(3 * 4 * 4) + 0] = np->rect.position.x; + buffer[(3 * 4 * 4) + 1] = np->rect.position.y + np->rect.size.y; - glEnableVertexAttribArray(VS::ARRAY_VERTEX); - glEnableVertexAttribArray(VS::ARRAY_TEX_UV); + buffer[(3 * 4 * 4) + 2] = source.position.x * texpixel_size.x; + buffer[(3 * 4 * 4) + 3] = (source.position.y + source.size.y) * texpixel_size.y; - glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), NULL); - glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), CAST_INT_TO_UCHAR_PTR((sizeof(float) * 2))); + buffer[(3 * 4 * 4) + 4] = np->rect.position.x + np->margin[MARGIN_LEFT] * screen_scale; + buffer[(3 * 4 * 4) + 5] = np->rect.position.y + np->rect.size.y; - glDrawElements(GL_TRIANGLES, 18 * 3 - (np->draw_center ? 0 : 6), GL_UNSIGNED_BYTE, NULL); + buffer[(3 * 4 * 4) + 6] = (source.position.x + np->margin[MARGIN_LEFT]) * texpixel_size.x; + buffer[(3 * 4 * 4) + 7] = (source.position.y + source.size.y) * texpixel_size.y; - glBindBuffer(GL_ARRAY_BUFFER, 0); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + buffer[(3 * 4 * 4) + 8] = np->rect.position.x + np->rect.size.x - np->margin[MARGIN_RIGHT] * screen_scale; + buffer[(3 * 4 * 4) + 9] = np->rect.position.y + np->rect.size.y; - } break; + buffer[(3 * 4 * 4) + 10] = (source.position.x + source.size.x - np->margin[MARGIN_RIGHT]) * texpixel_size.x; + buffer[(3 * 4 * 4) + 11] = (source.position.y + source.size.y) * texpixel_size.y; - case Item::Command::TYPE_CIRCLE: { + buffer[(3 * 4 * 4) + 12] = np->rect.position.x + np->rect.size.x; + buffer[(3 * 4 * 4) + 13] = np->rect.position.y + np->rect.size.y; - Item::CommandCircle *circle = static_cast(command); + buffer[(3 * 4 * 4) + 14] = (source.position.x + source.size.x) * texpixel_size.x; + buffer[(3 * 4 * 4) + 15] = (source.position.y + source.size.y) * texpixel_size.y; + } - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + glBindBuffer(GL_ARRAY_BUFFER, data.ninepatch_vertices); + glBufferData(GL_ARRAY_BUFFER, sizeof(float) * (16 + 16) * 2, buffer, GL_DYNAMIC_DRAW); - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.ninepatch_elements); - static const int num_points = 32; + glEnableVertexAttribArray(VS::ARRAY_VERTEX); + glEnableVertexAttribArray(VS::ARRAY_TEX_UV); - Vector2 points[num_points + 1]; - points[num_points] = circle->pos; + glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), NULL); + glVertexAttribPointer(VS::ARRAY_TEX_UV, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), CAST_INT_TO_UCHAR_PTR((sizeof(float) * 2))); - int indices[num_points * 3]; + glDrawElements(GL_TRIANGLES, 18 * 3 - (np->draw_center ? 0 : 6), GL_UNSIGNED_BYTE, NULL); - for (int j = 0; j < num_points; j++) { - points[j] = circle->pos + Vector2(Math::sin(j * Math_PI * 2.0 / num_points), Math::cos(j * Math_PI * 2.0 / num_points)) * circle->radius; - indices[j * 3 + 0] = j; - indices[j * 3 + 1] = (j + 1) % num_points; - indices[j * 3 + 2] = num_points; - } + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - _bind_canvas_texture(RID(), RID()); + } break; - _draw_polygon(indices, num_points * 3, num_points + 1, points, NULL, &circle->color, true); - } break; + case Item::Command::TYPE_CIRCLE: { - case Item::Command::TYPE_POLYGON: { + Item::CommandCircle *circle = static_cast(command); - Item::CommandPolygon *polygon = static_cast(command); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + static const int num_points = 32; - RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(polygon->texture, polygon->normal_map); + Vector2 points[num_points + 1]; + points[num_points] = circle->pos; - if (texture) { - Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); - state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); - } + int indices[num_points * 3]; - _draw_polygon(polygon->indices.ptr(), polygon->count, polygon->points.size(), polygon->points.ptr(), polygon->uvs.ptr(), polygon->colors.ptr(), polygon->colors.size() == 1, polygon->weights.ptr(), polygon->bones.ptr()); -#ifdef GLES_OVER_GL - if (polygon->antialiased) { - glEnable(GL_LINE_SMOOTH); - if (polygon->antialiasing_use_indices) { - _draw_generic_indices(GL_LINE_STRIP, polygon->indices.ptr(), polygon->count, polygon->points.size(), polygon->points.ptr(), polygon->uvs.ptr(), polygon->colors.ptr(), polygon->colors.size() == 1); - } else { - _draw_generic(GL_LINE_LOOP, polygon->points.size(), polygon->points.ptr(), polygon->uvs.ptr(), polygon->colors.ptr(), polygon->colors.size() == 1); - } - glDisable(GL_LINE_SMOOTH); - } -#endif - } break; - case Item::Command::TYPE_MESH: { + for (int j = 0; j < num_points; j++) { + points[j] = circle->pos + Vector2(Math::sin(j * Math_PI * 2.0 / num_points), Math::cos(j * Math_PI * 2.0 / num_points)) * circle->radius; + indices[j * 3 + 0] = j; + indices[j * 3 + 1] = (j + 1) % num_points; + indices[j * 3 + 2] = num_points; + } - Item::CommandMesh *mesh = static_cast(command); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + _bind_canvas_texture(RID(), RID()); - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + _draw_polygon(indices, num_points * 3, num_points + 1, points, NULL, &circle->color, true); + } break; - RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(mesh->texture, mesh->normal_map); + case Item::Command::TYPE_POLYGON: { - if (texture) { - Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); - state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); - } + Item::CommandPolygon *polygon = static_cast(command); - RasterizerStorageGLES2::Mesh *mesh_data = storage->mesh_owner.getornull(mesh->mesh); - if (mesh_data) { + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); - for (int j = 0; j < mesh_data->surfaces.size(); j++) { - RasterizerStorageGLES2::Surface *s = mesh_data->surfaces[j]; - // materials are ignored in 2D meshes, could be added but many things (ie, lighting mode, reading from screen, etc) would break as they are not meant be set up at this point of drawing + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } - glBindBuffer(GL_ARRAY_BUFFER, s->vertex_id); + RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(polygon->texture, polygon->normal_map); - if (s->index_array_len > 0) { - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, s->index_id); - } + if (texture) { + Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); + state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); + } - for (int k = 0; k < VS::ARRAY_MAX - 1; k++) { - if (s->attribs[k].enabled) { - glEnableVertexAttribArray(k); - glVertexAttribPointer(s->attribs[k].index, s->attribs[k].size, s->attribs[k].type, s->attribs[k].normalized, s->attribs[k].stride, CAST_INT_TO_UCHAR_PTR(s->attribs[k].offset)); - } else { - glDisableVertexAttribArray(k); - switch (k) { - case VS::ARRAY_NORMAL: { - glVertexAttrib4f(VS::ARRAY_NORMAL, 0.0, 0.0, 1, 1); - } break; - case VS::ARRAY_COLOR: { - glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); - - } break; - default: { - } + _draw_polygon(polygon->indices.ptr(), polygon->count, polygon->points.size(), polygon->points.ptr(), polygon->uvs.ptr(), polygon->colors.ptr(), polygon->colors.size() == 1, polygon->weights.ptr(), polygon->bones.ptr()); +#ifdef GLES_OVER_GL + if (polygon->antialiased) { + glEnable(GL_LINE_SMOOTH); + if (polygon->antialiasing_use_indices) { + _draw_generic_indices(GL_LINE_STRIP, polygon->indices.ptr(), polygon->count, polygon->points.size(), polygon->points.ptr(), polygon->uvs.ptr(), polygon->colors.ptr(), polygon->colors.size() == 1); + } else { + _draw_generic(GL_LINE_LOOP, polygon->points.size(), polygon->points.ptr(), polygon->uvs.ptr(), polygon->colors.ptr(), polygon->colors.size() == 1); } + glDisable(GL_LINE_SMOOTH); } - } +#endif + } break; + case Item::Command::TYPE_MESH: { - if (s->index_array_len > 0) { - glDrawElements(gl_primitive[s->primitive], s->index_array_len, (s->array_len >= (1 << 16)) ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT, 0); - } else { - glDrawArrays(gl_primitive[s->primitive], 0, s->array_len); - } - } + Item::CommandMesh *mesh = static_cast(command); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); - for (int j = 1; j < VS::ARRAY_MAX - 1; j++) { - glDisableVertexAttribArray(j); - } - } + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } - } break; - case Item::Command::TYPE_MULTIMESH: { - Item::CommandMultiMesh *mmesh = static_cast(command); + RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(mesh->texture, mesh->normal_map); - RasterizerStorageGLES2::MultiMesh *multi_mesh = storage->multimesh_owner.getornull(mmesh->multimesh); + if (texture) { + Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); + state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); + } - if (!multi_mesh) - break; + RasterizerStorageGLES2::Mesh *mesh_data = storage->mesh_owner.getornull(mesh->mesh); + if (mesh_data) { - RasterizerStorageGLES2::Mesh *mesh_data = storage->mesh_owner.getornull(multi_mesh->mesh); + for (int j = 0; j < mesh_data->surfaces.size(); j++) { + RasterizerStorageGLES2::Surface *s = mesh_data->surfaces[j]; + // materials are ignored in 2D meshes, could be added but many things (ie, lighting mode, reading from screen, etc) would break as they are not meant be set up at this point of drawing - if (!mesh_data) - break; + glBindBuffer(GL_ARRAY_BUFFER, s->vertex_id); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, multi_mesh->custom_data_format != VS::MULTIMESH_CUSTOM_DATA_NONE); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, true); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + if (s->index_array_len > 0) { + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, s->index_id); + } - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + for (int k = 0; k < VS::ARRAY_MAX - 1; k++) { + if (s->attribs[k].enabled) { + glEnableVertexAttribArray(k); + glVertexAttribPointer(s->attribs[k].index, s->attribs[k].size, s->attribs[k].type, s->attribs[k].normalized, s->attribs[k].stride, CAST_INT_TO_UCHAR_PTR(s->attribs[k].offset)); + } else { + glDisableVertexAttribArray(k); + switch (k) { + case VS::ARRAY_NORMAL: { + glVertexAttrib4f(VS::ARRAY_NORMAL, 0.0, 0.0, 1, 1); + } break; + case VS::ARRAY_COLOR: { + glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); + + } break; + default: { + } + } + } + } - RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(mmesh->texture, mmesh->normal_map); + if (s->index_array_len > 0) { + glDrawElements(gl_primitive[s->primitive], s->index_array_len, (s->array_len >= (1 << 16)) ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT, 0); + } else { + glDrawArrays(gl_primitive[s->primitive], 0, s->array_len); + } + } - if (texture) { - Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); - state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); - } + for (int j = 1; j < VS::ARRAY_MAX - 1; j++) { + glDisableVertexAttribArray(j); + } + } - //reset shader and force rebind + } break; + case Item::Command::TYPE_MULTIMESH: { + Item::CommandMultiMesh *mmesh = static_cast(command); - int amount = MIN(multi_mesh->size, multi_mesh->visible_instances); + RasterizerStorageGLES2::MultiMesh *multi_mesh = storage->multimesh_owner.getornull(mmesh->multimesh); - if (amount == -1) { - amount = multi_mesh->size; - } + if (!multi_mesh) + break; + + RasterizerStorageGLES2::Mesh *mesh_data = storage->mesh_owner.getornull(multi_mesh->mesh); - int stride = multi_mesh->color_floats + multi_mesh->custom_data_floats + multi_mesh->xform_floats; + if (!mesh_data) + break; - int color_ofs = multi_mesh->xform_floats; - int custom_data_ofs = color_ofs + multi_mesh->color_floats; + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, multi_mesh->custom_data_format != VS::MULTIMESH_CUSTOM_DATA_NONE); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, true); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); - // drawing + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } - const float *base_buffer = multi_mesh->data.ptr(); + RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(mmesh->texture, mmesh->normal_map); - for (int j = 0; j < mesh_data->surfaces.size(); j++) { - RasterizerStorageGLES2::Surface *s = mesh_data->surfaces[j]; - // materials are ignored in 2D meshes, could be added but many things (ie, lighting mode, reading from screen, etc) would break as they are not meant be set up at this point of drawing + if (texture) { + Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); + state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); + } - //bind buffers for mesh surface - glBindBuffer(GL_ARRAY_BUFFER, s->vertex_id); + //reset shader and force rebind - if (s->index_array_len > 0) { - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, s->index_id); - } + int amount = MIN(multi_mesh->size, multi_mesh->visible_instances); - for (int k = 0; k < VS::ARRAY_MAX - 1; k++) { - if (s->attribs[k].enabled) { - glEnableVertexAttribArray(k); - glVertexAttribPointer(s->attribs[k].index, s->attribs[k].size, s->attribs[k].type, s->attribs[k].normalized, s->attribs[k].stride, CAST_INT_TO_UCHAR_PTR(s->attribs[k].offset)); - } else { - glDisableVertexAttribArray(k); - switch (k) { - case VS::ARRAY_NORMAL: { - glVertexAttrib4f(VS::ARRAY_NORMAL, 0.0, 0.0, 1, 1); - } break; - case VS::ARRAY_COLOR: { - glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); - - } break; - default: { - } + if (amount == -1) { + amount = multi_mesh->size; } - } - } - for (int k = 0; k < amount; k++) { - const float *buffer = base_buffer + k * stride; + int stride = multi_mesh->color_floats + multi_mesh->custom_data_floats + multi_mesh->xform_floats; - { + int color_ofs = multi_mesh->xform_floats; + int custom_data_ofs = color_ofs + multi_mesh->color_floats; - glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 0, &buffer[0]); - glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 1, &buffer[4]); - if (multi_mesh->transform_format == VS::MULTIMESH_TRANSFORM_3D) { - glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 2, &buffer[8]); - } else { - glVertexAttrib4f(INSTANCE_ATTRIB_BASE + 2, 0.0, 0.0, 1.0, 0.0); - } - } + // drawing - if (multi_mesh->color_floats) { - if (multi_mesh->color_format == VS::MULTIMESH_COLOR_8BIT) { - uint8_t *color_data = (uint8_t *)(buffer + color_ofs); - glVertexAttrib4f(INSTANCE_ATTRIB_BASE + 3, color_data[0] / 255.0, color_data[1] / 255.0, color_data[2] / 255.0, color_data[3] / 255.0); - } else { - glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 3, buffer + color_ofs); - } - } else { - glVertexAttrib4f(INSTANCE_ATTRIB_BASE + 3, 1.0, 1.0, 1.0, 1.0); - } - - if (multi_mesh->custom_data_floats) { - if (multi_mesh->custom_data_format == VS::MULTIMESH_CUSTOM_DATA_8BIT) { - uint8_t *custom_data = (uint8_t *)(buffer + custom_data_ofs); - glVertexAttrib4f(INSTANCE_ATTRIB_BASE + 4, custom_data[0] / 255.0, custom_data[1] / 255.0, custom_data[2] / 255.0, custom_data[3] / 255.0); - } else { - glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 4, buffer + custom_data_ofs); - } - } + const float *base_buffer = multi_mesh->data.ptr(); - if (s->index_array_len > 0) { - glDrawElements(gl_primitive[s->primitive], s->index_array_len, (s->array_len >= (1 << 16)) ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT, 0); - } else { - glDrawArrays(gl_primitive[s->primitive], 0, s->array_len); - } - } - } + for (int j = 0; j < mesh_data->surfaces.size(); j++) { + RasterizerStorageGLES2::Surface *s = mesh_data->surfaces[j]; + // materials are ignored in 2D meshes, could be added but many things (ie, lighting mode, reading from screen, etc) would break as they are not meant be set up at this point of drawing - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, false); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, false); + //bind buffers for mesh surface + glBindBuffer(GL_ARRAY_BUFFER, s->vertex_id); - } break; - case Item::Command::TYPE_POLYLINE: { - Item::CommandPolyLine *pline = static_cast(command); + if (s->index_array_len > 0) { + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, s->index_id); + } - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + for (int k = 0; k < VS::ARRAY_MAX - 1; k++) { + if (s->attribs[k].enabled) { + glEnableVertexAttribArray(k); + glVertexAttribPointer(s->attribs[k].index, s->attribs[k].size, s->attribs[k].type, s->attribs[k].normalized, s->attribs[k].stride, CAST_INT_TO_UCHAR_PTR(s->attribs[k].offset)); + } else { + glDisableVertexAttribArray(k); + switch (k) { + case VS::ARRAY_NORMAL: { + glVertexAttrib4f(VS::ARRAY_NORMAL, 0.0, 0.0, 1, 1); + } break; + case VS::ARRAY_COLOR: { + glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); + + } break; + default: { + } + } + } + } - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + for (int k = 0; k < amount; k++) { + const float *buffer = base_buffer + k * stride; + + { + + glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 0, &buffer[0]); + glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 1, &buffer[4]); + if (multi_mesh->transform_format == VS::MULTIMESH_TRANSFORM_3D) { + glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 2, &buffer[8]); + } else { + glVertexAttrib4f(INSTANCE_ATTRIB_BASE + 2, 0.0, 0.0, 1.0, 0.0); + } + } - _bind_canvas_texture(RID(), RID()); + if (multi_mesh->color_floats) { + if (multi_mesh->color_format == VS::MULTIMESH_COLOR_8BIT) { + uint8_t *color_data = (uint8_t *)(buffer + color_ofs); + glVertexAttrib4f(INSTANCE_ATTRIB_BASE + 3, color_data[0] / 255.0, color_data[1] / 255.0, color_data[2] / 255.0, color_data[3] / 255.0); + } else { + glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 3, buffer + color_ofs); + } + } else { + glVertexAttrib4f(INSTANCE_ATTRIB_BASE + 3, 1.0, 1.0, 1.0, 1.0); + } + + if (multi_mesh->custom_data_floats) { + if (multi_mesh->custom_data_format == VS::MULTIMESH_CUSTOM_DATA_8BIT) { + uint8_t *custom_data = (uint8_t *)(buffer + custom_data_ofs); + glVertexAttrib4f(INSTANCE_ATTRIB_BASE + 4, custom_data[0] / 255.0, custom_data[1] / 255.0, custom_data[2] / 255.0, custom_data[3] / 255.0); + } else { + glVertexAttrib4fv(INSTANCE_ATTRIB_BASE + 4, buffer + custom_data_ofs); + } + } + + if (s->index_array_len > 0) { + glDrawElements(gl_primitive[s->primitive], s->index_array_len, (s->array_len >= (1 << 16)) ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT, 0); + } else { + glDrawArrays(gl_primitive[s->primitive], 0, s->array_len); + } + } + } - if (pline->triangles.size()) { - _draw_generic(GL_TRIANGLE_STRIP, pline->triangles.size(), pline->triangles.ptr(), NULL, pline->triangle_colors.ptr(), pline->triangle_colors.size() == 1); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, false); + + } break; + case Item::Command::TYPE_POLYLINE: { + Item::CommandPolyLine *pline = static_cast(command); + + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } + + _bind_canvas_texture(RID(), RID()); + + if (pline->triangles.size()) { + _draw_generic(GL_TRIANGLE_STRIP, pline->triangles.size(), pline->triangles.ptr(), NULL, pline->triangle_colors.ptr(), pline->triangle_colors.size() == 1); #ifdef GLES_OVER_GL - glEnable(GL_LINE_SMOOTH); - if (pline->multiline) { - //needs to be different - } else { - _draw_generic(GL_LINE_LOOP, pline->lines.size(), pline->lines.ptr(), NULL, pline->line_colors.ptr(), pline->line_colors.size() == 1); - } - glDisable(GL_LINE_SMOOTH); + glEnable(GL_LINE_SMOOTH); + if (pline->multiline) { + //needs to be different + } else { + _draw_generic(GL_LINE_LOOP, pline->lines.size(), pline->lines.ptr(), NULL, pline->line_colors.ptr(), pline->line_colors.size() == 1); + } + glDisable(GL_LINE_SMOOTH); #endif - } else { + } else { #ifdef GLES_OVER_GL - if (pline->antialiased) - glEnable(GL_LINE_SMOOTH); + if (pline->antialiased) + glEnable(GL_LINE_SMOOTH); #endif - if (pline->multiline) { - int todo = pline->lines.size() / 2; - int max_per_call = data.polygon_buffer_size / (sizeof(real_t) * 4); - int offset = 0; - - while (todo) { - int to_draw = MIN(max_per_call, todo); - _draw_generic(GL_LINES, to_draw * 2, &pline->lines.ptr()[offset], NULL, pline->line_colors.size() == 1 ? pline->line_colors.ptr() : &pline->line_colors.ptr()[offset], pline->line_colors.size() == 1); - todo -= to_draw; - offset += to_draw * 2; - } - } else { - _draw_generic(GL_LINES, pline->lines.size(), pline->lines.ptr(), NULL, pline->line_colors.ptr(), pline->line_colors.size() == 1); - } + if (pline->multiline) { + int todo = pline->lines.size() / 2; + int max_per_call = data.polygon_buffer_size / (sizeof(real_t) * 4); + int offset = 0; + + while (todo) { + int to_draw = MIN(max_per_call, todo); + _draw_generic(GL_LINES, to_draw * 2, &pline->lines.ptr()[offset], NULL, pline->line_colors.size() == 1 ? pline->line_colors.ptr() : &pline->line_colors.ptr()[offset], pline->line_colors.size() == 1); + todo -= to_draw; + offset += to_draw * 2; + } + } else { + _draw_generic(GL_LINES, pline->lines.size(), pline->lines.ptr(), NULL, pline->line_colors.ptr(), pline->line_colors.size() == 1); + } #ifdef GLES_OVER_GL - if (pline->antialiased) - glDisable(GL_LINE_SMOOTH); + if (pline->antialiased) + glDisable(GL_LINE_SMOOTH); #endif - } - } break; + } + } break; - case Item::Command::TYPE_PRIMITIVE: { + case Item::Command::TYPE_PRIMITIVE: { - Item::CommandPrimitive *primitive = static_cast(command); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + Item::CommandPrimitive *primitive = static_cast(command); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); - if (state.canvas_shader.bind()) { - _set_uniforms(); - state.canvas_shader.use_material((void *)p_material); - } + if (state.canvas_shader.bind()) { + _set_uniforms(); + state.canvas_shader.use_material((void *)p_material); + } - ERR_CONTINUE(primitive->points.size() < 1); + ERR_CONTINUE(primitive->points.size() < 1); - RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(primitive->texture, primitive->normal_map); + RasterizerStorageGLES2::Texture *texture = _bind_canvas_texture(primitive->texture, primitive->normal_map); - if (texture) { - Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); - state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); - } + if (texture) { + Size2 texpixel_size(1.0 / texture->width, 1.0 / texture->height); + state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); + } - if (primitive->colors.size() == 1 && primitive->points.size() > 1) { - Color c = primitive->colors[0]; - glVertexAttrib4f(VS::ARRAY_COLOR, c.r, c.g, c.b, c.a); - } else if (primitive->colors.empty()) { - glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); - } + if (primitive->colors.size() == 1 && primitive->points.size() > 1) { + Color c = primitive->colors[0]; + glVertexAttrib4f(VS::ARRAY_COLOR, c.r, c.g, c.b, c.a); + } else if (primitive->colors.empty()) { + glVertexAttrib4f(VS::ARRAY_COLOR, 1, 1, 1, 1); + } - _draw_gui_primitive(primitive->points.size(), primitive->points.ptr(), primitive->colors.ptr(), primitive->uvs.ptr()); - } break; + _draw_gui_primitive(primitive->points.size(), primitive->points.ptr(), primitive->colors.ptr(), primitive->uvs.ptr()); + } break; - case Item::Command::TYPE_TRANSFORM: { - Item::CommandTransform *transform = static_cast(command); - state.uniforms.extra_matrix = transform->xform; - state.canvas_shader.set_uniform(CanvasShaderGLES2::EXTRA_MATRIX, state.uniforms.extra_matrix); - } break; + case Item::Command::TYPE_TRANSFORM: { + Item::CommandTransform *transform = static_cast(command); + state.uniforms.extra_matrix = transform->xform; + state.canvas_shader.set_uniform(CanvasShaderGLES2::EXTRA_MATRIX, state.uniforms.extra_matrix); + } break; - case Item::Command::TYPE_PARTICLES: { + case Item::Command::TYPE_PARTICLES: { - } break; + } break; + + case Item::Command::TYPE_CLIP_IGNORE: { + + Item::CommandClipIgnore *ci = static_cast(command); + if (p_current_clip) { + if (ci->ignore != r_reclip) { + if (ci->ignore) { + glDisable(GL_SCISSOR_TEST); + r_reclip = true; + } else { + glEnable(GL_SCISSOR_TEST); - case Item::Command::TYPE_CLIP_IGNORE: { + int x = p_current_clip->final_clip_rect.position.x; + int y = storage->frame.current_rt->height - (p_current_clip->final_clip_rect.position.y + p_current_clip->final_clip_rect.size.y); + int w = p_current_clip->final_clip_rect.size.x; + int h = p_current_clip->final_clip_rect.size.y; - Item::CommandClipIgnore *ci = static_cast(command); - if (current_clip) { - if (ci->ignore != reclip) { - if (ci->ignore) { - glDisable(GL_SCISSOR_TEST); - reclip = true; - } else { - glEnable(GL_SCISSOR_TEST); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) + y = p_current_clip->final_clip_rect.position.y; - int x = current_clip->final_clip_rect.position.x; - int y = storage->frame.current_rt->height - (current_clip->final_clip_rect.position.y + current_clip->final_clip_rect.size.y); - int w = current_clip->final_clip_rect.size.x; - int h = current_clip->final_clip_rect.size.y; + glScissor(x, y, w, h); - if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) - y = current_clip->final_clip_rect.position.y; + r_reclip = false; + } + } + } - glScissor(x, y, w, h); + } break; - reclip = false; - } + default: { + // FIXME: Proper error handling if relevant + //print_line("other"); + } break; } } - } break; - - default: { - // FIXME: Proper error handling if relevant - //print_line("other"); - } break; + } // default + break; } } + + // zero all the batch data ready for a new run + bdata.reset_flush(); } -void RasterizerCanvasGLES2::_copy_screen(const Rect2 &p_rect) { +void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material) { - if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_DIRECT_TO_SCREEN]) { - ERR_PRINT_ONCE("Cannot use screen texture copying in render target set to render direct to screen."); - return; - } + Item *item = 0; + Item *first_item = bdata.item_refs[p_bij.first_item_ref].item; - ERR_FAIL_COND_MSG(storage->frame.current_rt->copy_screen_effect.color == 0, "Can't use screen texture copying in a render target configured without copy buffers."); + FillState fill_state; + fill_state.reset(); + fill_state.use_hardware_transform = p_bij.use_hardware_transform(); - glDisable(GL_BLEND); + for (unsigned int i = 0; i < p_bij.num_item_refs; i++) { + item = bdata.item_refs[p_bij.first_item_ref + i].item; - Vector2 wh(storage->frame.current_rt->width, storage->frame.current_rt->height); + int command_count = item->commands.size(); + int command_start = 0; - Color copy_section(p_rect.position.x / wh.x, p_rect.position.y / wh.y, p_rect.size.x / wh.x, p_rect.size.y / wh.y); + while (command_start < command_count) { + // fill as many batches as possible (until all done, or the vertex buffer is full) + bool bFull = prefill_joined_item(fill_state, command_start, item, p_current_clip, r_reclip, p_material); - if (p_rect != Rect2()) { - storage->shaders.copy.set_conditional(CopyShaderGLES2::USE_COPY_SECTION, true); + if (bFull) { + // always pass first item (commands for default are always first item) + flush_render_batches(first_item, p_current_clip, r_reclip, p_material); + fill_state.reset(); + fill_state.use_hardware_transform = p_bij.use_hardware_transform(); + } + } } - storage->shaders.copy.set_conditional(CopyShaderGLES2::USE_NO_ALPHA, !state.using_transparent_rt); + // flush if any left + flush_render_batches(first_item, p_current_clip, r_reclip, p_material); +} - glBindFramebuffer(GL_FRAMEBUFFER, storage->frame.current_rt->copy_screen_effect.fbo); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, storage->frame.current_rt->color); +void RasterizerCanvasGLES2::flush_render_batches(Item *p_first_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material) { - storage->shaders.copy.bind(); - storage->shaders.copy.set_uniform(CopyShaderGLES2::COPY_SECTION, copy_section); + // some heuristic to decide whether to use colored verts. + // feel free to tweak this. + // this could use hysteresis, to prevent jumping between methods + // .. however probably not necessary + bdata.use_colored_vertices = false; - const Vector2 vertpos[4] = { - Vector2(-1, -1), - Vector2(-1, 1), - Vector2(1, 1), - Vector2(1, -1), - }; + // only check whether to convert if there are quads (prevent divide by zero) + if (bdata.total_quads) { + float ratio = (float)bdata.total_color_changes / (float)bdata.total_quads; - const Vector2 uvpos[4] = { - Vector2(0, 0), - Vector2(0, 1), - Vector2(1, 1), - Vector2(1, 0) - }; + // use bigger than or equal so that 0.0 threshold can force always using colored verts + if (ratio >= bdata.settings_colored_vertex_format_threshold) { + bdata.use_colored_vertices = true; - const int indexpos[6] = { - 0, 1, 2, - 2, 3, 0 - }; + // small perf cost versus going straight to colored verts (maybe around 10%) + // however more straightforward + _batch_translate_to_colored(); + } + } - _draw_polygon(indexpos, 6, 4, vertpos, uvpos, NULL, false); + // send buffers to opengl + _batch_upload_buffers(); - storage->shaders.copy.set_conditional(CopyShaderGLES2::USE_COPY_SECTION, false); - storage->shaders.copy.set_conditional(CopyShaderGLES2::USE_NO_ALPHA, false); + Item::Command *const *commands = p_first_item->commands.ptr(); - glBindFramebuffer(GL_FRAMEBUFFER, storage->frame.current_rt->fbo); //back to front - glEnable(GL_BLEND); + render_batches(commands, p_current_clip, r_reclip, p_material); } -void RasterizerCanvasGLES2::_copy_texscreen(const Rect2 &p_rect) { +void RasterizerCanvasGLES2::_canvas_item_render_commands(Item *p_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material) { - state.canvas_texscreen_used = true; + int command_count = p_item->commands.size(); - _copy_screen(p_rect); + Item::Command *const *commands = p_item->commands.ptr(); - // back to canvas, force rebind - state.using_texture_rect = false; - state.canvas_shader.bind(); - _bind_canvas_texture(state.current_tex, state.current_normal); - _set_uniforms(); + // legacy .. just create one massive batch and render everything as before + bdata.batches.reset(); + Batch *batch = _batch_request_new(); + batch->type = Batch::BT_DEFAULT; + batch->num_commands = command_count; + + render_batches(commands, p_current_clip, r_reclip, p_material); +} + +void RasterizerCanvasGLES2::join_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { + bdata.items_joined.reset(); + bdata.item_refs.reset(); + + RenderItemState render_item_state; + render_item_state.item_group_z = p_z; + render_item_state.item_group_modulate = p_modulate; + render_item_state.item_group_light = p_light; + render_item_state.item_group_base_transform = p_base_transform; + + BItemJoined *j = 0; + + // join is whether to join to the previous batch. + // batch_break is whether to PREVENT the next batch from joining with us + bool batch_break = false; + + while (p_item_list) { + + Item *ci = p_item_list; + + bool join; + + if (batch_break) { + // always start a new batch for this item + join = false; + + // could be another batch break (i.e. prevent NEXT item from joining this) + // so we still need to run try_join_item + // even though we know join is false. + // also we need to run try_join_item for every item because it keeps the state up to date, + // if we didn't run it the state would be out of date. + try_join_item(ci, render_item_state, batch_break); + } else { + join = try_join_item(ci, render_item_state, batch_break); + } + + // assume the first item will always return no join + if (!join) { + j = bdata.items_joined.request_with_grow(); + j->first_item_ref = bdata.item_refs.size(); + j->num_item_refs = 1; + j->bounding_rect = ci->global_rect_cache; + + // add the reference + BItemRef *r = bdata.item_refs.request_with_grow(); + r->item = ci; + } else { + CRASH_COND(j == 0); + j->num_item_refs += 1; + j->bounding_rect = j->bounding_rect.merge(ci->global_rect_cache); + + BItemRef *r = bdata.item_refs.request_with_grow(); + r->item = ci; + } + + p_item_list = p_item_list->next; + } } void RasterizerCanvasGLES2::canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { - Item *current_clip = NULL; + // if we are debugging, flash each frame between batching renderer and old version to compare for regressions + if (bdata.settings_flash_batching) { + if ((Engine::get_singleton()->get_frames_drawn() % 2) == 0) + bdata.settings_use_batching = true; + else + bdata.settings_use_batching = false; + } + + // this only needs to be done when screen size changes, but this should be + // infrequent enough + _calculate_scissor_threshold_area(); + + // state 1 : join similar items, so that their state changes are not repeated, + // and commands from joined items can be batched together + if (bdata.settings_use_batching) + join_items(p_item_list, p_z, p_modulate, p_light, p_base_transform); + + canvas_render_items_implementation(p_item_list, p_z, p_modulate, p_light, p_base_transform); +} - RasterizerStorageGLES2::Shader *shader_cache = NULL; +void RasterizerCanvasGLES2::canvas_render_items_implementation(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { + + // parameters are easier to pass around in a structure + RenderItemState ris; + ris.item_group_z = p_z; + ris.item_group_modulate = p_modulate; + ris.item_group_light = p_light; + ris.item_group_base_transform = p_base_transform; - bool rebind_shader = true; - bool prev_use_skeleton = false; state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SKELETON, false); state.current_tex = RID(); @@ -1442,787 +1588,1186 @@ void RasterizerCanvasGLES2::canvas_render_items(Item *p_item_list, int p_z, cons glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); - int last_blend_mode = -1; + if (bdata.settings_use_batching) { + for (int j = 0; j < bdata.items_joined.size(); j++) { + render_joined_item(bdata.items_joined[j], ris); + } + } else { + while (p_item_list) { - RID canvas_last_material = RID(); + Item *ci = p_item_list; + _canvas_render_item(ci, ris); + p_item_list = p_item_list->next; + } + } - while (p_item_list) { + if (ris.current_clip) { + glDisable(GL_SCISSOR_TEST); + } - Item *ci = p_item_list; + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SKELETON, false); +} - if (current_clip != ci->final_clip_owner) { +// This function is a dry run of the state changes when drawing the item. +// It should duplicate the logic in _canvas_render_item, +// to decide whether items are similar enough to join +// i.e. no state differences between the 2 items. +bool RasterizerCanvasGLES2::try_join_item(Item *p_ci, RenderItemState &r_ris, bool &r_batch_break) { + + // if we set max join items to zero we can effectively prevent any joining, so + // none of the other logic needs to run. Good for testing regression bugs, and + // could conceivably be faster in some games. + if (!bdata.settings_max_join_item_commands) { + return false; + } - current_clip = ci->final_clip_owner; + // if there are any state changes we change join to false + // we also set r_batch_break to true if we don't want this item joined + r_batch_break = false; + bool join = true; - if (current_clip) { - glEnable(GL_SCISSOR_TEST); - int y = storage->frame.current_rt->height - (current_clip->final_clip_rect.position.y + current_clip->final_clip_rect.size.y); - if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) - y = current_clip->final_clip_rect.position.y; - glScissor(current_clip->final_clip_rect.position.x, y, current_clip->final_clip_rect.size.width, current_clip->final_clip_rect.size.height); - } else { - glDisable(GL_SCISSOR_TEST); + // light_masked may possibly need state checking here. Check for regressions! + + if (p_ci->final_modulate != r_ris.final_modulate) { + join = false; + r_ris.final_modulate = p_ci->final_modulate; + } + + if (r_ris.current_clip != p_ci->final_clip_owner) { + r_ris.current_clip = p_ci->final_clip_owner; + join = false; + } + + // TODO: copy back buffer + + if (p_ci->copy_back_buffer) { + join = false; + } + + RasterizerStorageGLES2::Skeleton *skeleton = NULL; + + { + //skeleton handling + if (p_ci->skeleton.is_valid() && storage->skeleton_owner.owns(p_ci->skeleton)) { + skeleton = storage->skeleton_owner.get(p_ci->skeleton); + if (!skeleton->use_2d) { + skeleton = NULL; } } - // TODO: copy back buffer + bool use_skeleton = skeleton != NULL; + if (r_ris.prev_use_skeleton != use_skeleton) { + r_ris.rebind_shader = true; + r_ris.prev_use_skeleton = use_skeleton; + join = false; + } - if (ci->copy_back_buffer) { - if (ci->copy_back_buffer->full) { - _copy_texscreen(Rect2()); - } else { - _copy_texscreen(ci->copy_back_buffer->rect); - } + if (skeleton) { + join = false; + state.using_skeleton = true; + } else { + state.using_skeleton = false; } + } - RasterizerStorageGLES2::Skeleton *skeleton = NULL; + Item *material_owner = p_ci->material_owner ? p_ci->material_owner : p_ci; - { - //skeleton handling - if (ci->skeleton.is_valid() && storage->skeleton_owner.owns(ci->skeleton)) { - skeleton = storage->skeleton_owner.get(ci->skeleton); - if (!skeleton->use_2d) { - skeleton = NULL; - } else { - state.skeleton_transform = p_base_transform * skeleton->base_transform_2d; - state.skeleton_transform_inverse = state.skeleton_transform.affine_inverse(); - state.skeleton_texture_size = Vector2(skeleton->size * 2, 0); - } - } + RID material = material_owner->material; + RasterizerStorageGLES2::Material *material_ptr = storage->material_owner.getornull(material); + + if (material != r_ris.canvas_last_material || r_ris.rebind_shader) { - bool use_skeleton = skeleton != NULL; - if (prev_use_skeleton != use_skeleton) { - rebind_shader = true; - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SKELETON, use_skeleton); - prev_use_skeleton = use_skeleton; + join = false; + RasterizerStorageGLES2::Shader *shader_ptr = NULL; + + if (material_ptr) { + shader_ptr = material_ptr->shader; + + if (shader_ptr && shader_ptr->mode != VS::SHADER_CANVAS_ITEM) { + shader_ptr = NULL; // not a canvas item shader, don't use. } + } - if (skeleton) { - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 3); - glBindTexture(GL_TEXTURE_2D, skeleton->tex_id); - state.using_skeleton = true; - } else { - state.using_skeleton = false; + if (shader_ptr) { + if (shader_ptr->canvas_item.uses_screen_texture) { + if (!state.canvas_texscreen_used) { + join = false; + } } } - Item *material_owner = ci->material_owner ? ci->material_owner : ci; + r_ris.shader_cache = shader_ptr; - RID material = material_owner->material; - RasterizerStorageGLES2::Material *material_ptr = storage->material_owner.getornull(material); + r_ris.canvas_last_material = material; - if (material != canvas_last_material || rebind_shader) { + r_ris.rebind_shader = false; + } - RasterizerStorageGLES2::Shader *shader_ptr = NULL; + int blend_mode = r_ris.shader_cache ? r_ris.shader_cache->canvas_item.blend_mode : RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX; + bool unshaded = r_ris.shader_cache && (r_ris.shader_cache->canvas_item.light_mode == RasterizerStorageGLES2::Shader::CanvasItem::LIGHT_MODE_UNSHADED || (blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX && blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA)); + bool reclip = false; - if (material_ptr) { - shader_ptr = material_ptr->shader; + if (r_ris.last_blend_mode != blend_mode) { + join = false; + r_ris.last_blend_mode = blend_mode; + } - if (shader_ptr && shader_ptr->mode != VS::SHADER_CANVAS_ITEM) { - shader_ptr = NULL; // not a canvas item shader, don't use. - } - } + if ((blend_mode == RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX || blend_mode == RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA) && r_ris.item_group_light && !unshaded) { - if (shader_ptr) { - if (shader_ptr->canvas_item.uses_screen_texture) { - if (!state.canvas_texscreen_used) { - //copy if not copied before - _copy_texscreen(Rect2()); + // we cannot join lit items easily. + // it is possible, but not if they overlap, because + // a + light_blend + b + light_blend IS NOT THE SAME AS + // a + b + light_blend + join = false; + } - // blend mode will have been enabled so make sure we disable it again later on - //last_blend_mode = last_blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_DISABLED ? last_blend_mode : -1; - } + if (reclip) { + join = false; + } - if (storage->frame.current_rt->copy_screen_effect.color) { - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 4); - glBindTexture(GL_TEXTURE_2D, storage->frame.current_rt->copy_screen_effect.color); - } - } + // non rects will break the batching anyway, we don't want to record item changes, detect this + if (_detect_batch_break(p_ci)) { + join = false; + r_batch_break = true; + } - if (shader_ptr != shader_cache) { + return join; +} - if (shader_ptr->canvas_item.uses_time) { - VisualServerRaster::redraw_request(); - } +bool RasterizerCanvasGLES2::_detect_batch_break(Item *p_ci) { + int command_count = p_ci->commands.size(); - state.canvas_shader.set_custom_shader(shader_ptr->custom_code_id); - state.canvas_shader.bind(); - } + // Any item that contains commands that are default + // (i.e. not handled by software transform and the batching renderer) should not be joined. - int tc = material_ptr->textures.size(); - Pair *textures = material_ptr->textures.ptrw(); + // In order to work this out, it does a lookahead through the commands, + // which could potentially be very expensive. As such it makes sense to put a limit on this + // to some small number, which will catch nearly all cases which need joining, + // but not be overly expensive in the case of items with large numbers of commands. - ShaderLanguage::ShaderNode::Uniform::Hint *texture_hints = shader_ptr->texture_hints.ptrw(); + // It is hard to know what this number should be, empirically, + // and this has not been fully investigated. It works to join single sprite items when set to 1 or above. + // Note that there is a cost to increasing this because it has to look in advance through + // the commands. + // On the other hand joining items where possible will usually be better up to a certain + // number where the cost of software transform is higher than separate drawcalls with hardware + // transform. - for (int i = 0; i < tc; i++) { + // if there are more than this number of commands in the item, we + // don't allow joining (separate state changes, and hardware transform) + // This is set to quite a conservative (low) number until investigated properly. + // const int MAX_JOIN_ITEM_COMMANDS = 16; - glActiveTexture(GL_TEXTURE0 + i); + if (command_count > bdata.settings_max_join_item_commands) { + return true; + } else { + Item::Command *const *commands = p_ci->commands.ptr(); - RasterizerStorageGLES2::Texture *t = storage->texture_owner.getornull(textures[i].second); + // do as many commands as possible until the vertex buffer will be full up + for (int command_num = 0; command_num < command_count; command_num++) { - if (!t) { + Item::Command *command = commands[command_num]; + CRASH_COND(!command); - switch (texture_hints[i]) { - case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK_ALBEDO: - case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK: { - glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); - } break; - case ShaderLanguage::ShaderNode::Uniform::HINT_ANISO: { - glBindTexture(GL_TEXTURE_2D, storage->resources.aniso_tex); - } break; - case ShaderLanguage::ShaderNode::Uniform::HINT_NORMAL: { - glBindTexture(GL_TEXTURE_2D, storage->resources.normal_tex); - } break; - default: { - glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); - } break; - } + switch (command->type) { - continue; - } + default: { + return true; + } break; + case Item::Command::TYPE_RECT: { + } break; + } // switch - if (t->redraw_if_visible) { - VisualServerRaster::redraw_request(); - } + } // for through commands - t = t->get_ptr(); + } // else -#ifdef TOOLS_ENABLED - if (t->detect_normal && texture_hints[i] == ShaderLanguage::ShaderNode::Uniform::HINT_NORMAL) { - t->detect_normal(t->detect_normal_ud); - } -#endif - if (t->render_target) - t->render_target->used_in_frame = true; + return false; +} - glBindTexture(t->target, t->tex_id); - } +// Legacy non-batched implementation for regression testing. +// Should be removed after testing phase to avoid duplicate codepaths. +void RasterizerCanvasGLES2::_canvas_render_item(Item *p_ci, RenderItemState &r_ris) { + if (r_ris.current_clip != p_ci->final_clip_owner) { + + r_ris.current_clip = p_ci->final_clip_owner; + + if (r_ris.current_clip) { + glEnable(GL_SCISSOR_TEST); + int y = storage->frame.current_rt->height - (r_ris.current_clip->final_clip_rect.position.y + r_ris.current_clip->final_clip_rect.size.y); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) + y = r_ris.current_clip->final_clip_rect.position.y; + glScissor(r_ris.current_clip->final_clip_rect.position.x, y, r_ris.current_clip->final_clip_rect.size.width, r_ris.current_clip->final_clip_rect.size.height); + } else { + glDisable(GL_SCISSOR_TEST); + } + } + + // TODO: copy back buffer + if (p_ci->copy_back_buffer) { + if (p_ci->copy_back_buffer->full) { + _copy_texscreen(Rect2()); + } else { + _copy_texscreen(p_ci->copy_back_buffer->rect); + } + } + + RasterizerStorageGLES2::Skeleton *skeleton = NULL; + + { + //skeleton handling + if (p_ci->skeleton.is_valid() && storage->skeleton_owner.owns(p_ci->skeleton)) { + skeleton = storage->skeleton_owner.get(p_ci->skeleton); + if (!skeleton->use_2d) { + skeleton = NULL; } else { - state.canvas_shader.set_custom_shader(0); - state.canvas_shader.bind(); + state.skeleton_transform = r_ris.item_group_base_transform * skeleton->base_transform_2d; + state.skeleton_transform_inverse = state.skeleton_transform.affine_inverse(); + state.skeleton_texture_size = Vector2(skeleton->size * 2, 0); } - state.canvas_shader.use_material((void *)material_ptr); + } + + bool use_skeleton = skeleton != NULL; + if (r_ris.prev_use_skeleton != use_skeleton) { + r_ris.rebind_shader = true; + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SKELETON, use_skeleton); + r_ris.prev_use_skeleton = use_skeleton; + } + + if (skeleton) { + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 3); + glBindTexture(GL_TEXTURE_2D, skeleton->tex_id); + state.using_skeleton = true; + } else { + state.using_skeleton = false; + } + } - shader_cache = shader_ptr; + Item *material_owner = p_ci->material_owner ? p_ci->material_owner : p_ci; - canvas_last_material = material; + RID material = material_owner->material; + RasterizerStorageGLES2::Material *material_ptr = storage->material_owner.getornull(material); - rebind_shader = false; + if (material != r_ris.canvas_last_material || r_ris.rebind_shader) { + + RasterizerStorageGLES2::Shader *shader_ptr = NULL; + + if (material_ptr) { + shader_ptr = material_ptr->shader; + + if (shader_ptr && shader_ptr->mode != VS::SHADER_CANVAS_ITEM) { + shader_ptr = NULL; // not a canvas item shader, don't use. + } } - int blend_mode = shader_cache ? shader_cache->canvas_item.blend_mode : RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX; - bool unshaded = shader_cache && (shader_cache->canvas_item.light_mode == RasterizerStorageGLES2::Shader::CanvasItem::LIGHT_MODE_UNSHADED || (blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX && blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA)); - bool reclip = false; + if (shader_ptr) { + if (shader_ptr->canvas_item.uses_screen_texture) { + if (!state.canvas_texscreen_used) { + //copy if not copied before + _copy_texscreen(Rect2()); - if (last_blend_mode != blend_mode) { + // blend mode will have been enabled so make sure we disable it again later on + //last_blend_mode = last_blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_DISABLED ? last_blend_mode : -1; + } - switch (blend_mode) { + if (storage->frame.current_rt->copy_screen_effect.color) { + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 4); + glBindTexture(GL_TEXTURE_2D, storage->frame.current_rt->copy_screen_effect.color); + } + } - case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX: { - glBlendEquation(GL_FUNC_ADD); - if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); - } else { - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); - } + if (shader_ptr != r_ris.shader_cache) { - } break; - case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_ADD: { + if (shader_ptr->canvas_item.uses_time) { + VisualServerRaster::redraw_request(); + } - glBlendEquation(GL_FUNC_ADD); - if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_SRC_ALPHA, GL_ONE); - } else { - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ZERO, GL_ONE); - } + state.canvas_shader.set_custom_shader(shader_ptr->custom_code_id); + state.canvas_shader.bind(); + } - } break; - case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_SUB: { + int tc = material_ptr->textures.size(); + Pair *textures = material_ptr->textures.ptrw(); - glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); - if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_SRC_ALPHA, GL_ONE); - } else { - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ZERO, GL_ONE); - } - } break; - case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MUL: { - glBlendEquation(GL_FUNC_ADD); - if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { - glBlendFuncSeparate(GL_DST_COLOR, GL_ZERO, GL_DST_ALPHA, GL_ZERO); - } else { - glBlendFuncSeparate(GL_DST_COLOR, GL_ZERO, GL_ZERO, GL_ONE); - } - } break; - case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA: { - glBlendEquation(GL_FUNC_ADD); - if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { - glBlendFuncSeparate(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); - } else { - glBlendFuncSeparate(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); + ShaderLanguage::ShaderNode::Uniform::Hint *texture_hints = shader_ptr->texture_hints.ptrw(); + + for (int i = 0; i < tc; i++) { + + glActiveTexture(GL_TEXTURE0 + i); + + RasterizerStorageGLES2::Texture *t = storage->texture_owner.getornull(textures[i].second); + + if (!t) { + + switch (texture_hints[i]) { + case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK_ALBEDO: + case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK: { + glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); + } break; + case ShaderLanguage::ShaderNode::Uniform::HINT_ANISO: { + glBindTexture(GL_TEXTURE_2D, storage->resources.aniso_tex); + } break; + case ShaderLanguage::ShaderNode::Uniform::HINT_NORMAL: { + glBindTexture(GL_TEXTURE_2D, storage->resources.normal_tex); + } break; + default: { + glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); + } break; } - } break; + + continue; + } + + if (t->redraw_if_visible) { + VisualServerRaster::redraw_request(); + } + + t = t->get_ptr(); + +#ifdef TOOLS_ENABLED + if (t->detect_normal && texture_hints[i] == ShaderLanguage::ShaderNode::Uniform::HINT_NORMAL) { + t->detect_normal(t->detect_normal_ud); + } +#endif + if (t->render_target) + t->render_target->used_in_frame = true; + + glBindTexture(t->target, t->tex_id); } + + } else { + state.canvas_shader.set_custom_shader(0); + state.canvas_shader.bind(); } + state.canvas_shader.use_material((void *)material_ptr); - state.uniforms.final_modulate = unshaded ? ci->final_modulate : Color(ci->final_modulate.r * p_modulate.r, ci->final_modulate.g * p_modulate.g, ci->final_modulate.b * p_modulate.b, ci->final_modulate.a * p_modulate.a); + r_ris.shader_cache = shader_ptr; - state.uniforms.modelview_matrix = ci->final_transform; - state.uniforms.extra_matrix = Transform2D(); + r_ris.canvas_last_material = material; - _set_uniforms(); + r_ris.rebind_shader = false; + } + + int blend_mode = r_ris.shader_cache ? r_ris.shader_cache->canvas_item.blend_mode : RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX; + bool unshaded = r_ris.shader_cache && (r_ris.shader_cache->canvas_item.light_mode == RasterizerStorageGLES2::Shader::CanvasItem::LIGHT_MODE_UNSHADED || (blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX && blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA)); + bool reclip = false; + + if (r_ris.last_blend_mode != blend_mode) { + + switch (blend_mode) { + + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); + } + + } break; + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_ADD: { + + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_SRC_ALPHA, GL_ONE); + } else { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ZERO, GL_ONE); + } - if (unshaded || (state.uniforms.final_modulate.a > 0.001 && (!shader_cache || shader_cache->canvas_item.light_mode != RasterizerStorageGLES2::Shader::CanvasItem::LIGHT_MODE_LIGHT_ONLY) && !ci->light_masked)) - _canvas_item_render_commands(p_item_list, NULL, reclip, material_ptr); + } break; + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_SUB: { + + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_SRC_ALPHA, GL_ONE); + } else { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ZERO, GL_ONE); + } + } break; + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MUL: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_DST_COLOR, GL_ZERO, GL_DST_ALPHA, GL_ZERO); + } else { + glBlendFuncSeparate(GL_DST_COLOR, GL_ZERO, GL_ZERO, GL_ONE); + } + } break; + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFuncSeparate(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); + } + } break; + } + } - rebind_shader = true; // hacked in for now. + state.uniforms.final_modulate = unshaded ? p_ci->final_modulate : Color(p_ci->final_modulate.r * r_ris.item_group_modulate.r, p_ci->final_modulate.g * r_ris.item_group_modulate.g, p_ci->final_modulate.b * r_ris.item_group_modulate.b, p_ci->final_modulate.a * r_ris.item_group_modulate.a); - if ((blend_mode == RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX || blend_mode == RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA) && p_light && !unshaded) { + state.uniforms.modelview_matrix = p_ci->final_transform; + state.uniforms.extra_matrix = Transform2D(); - Light *light = p_light; - bool light_used = false; - VS::CanvasLightMode mode = VS::CANVAS_LIGHT_MODE_ADD; - state.uniforms.final_modulate = ci->final_modulate; // remove the canvas modulate + _set_uniforms(); - while (light) { + if (unshaded || (state.uniforms.final_modulate.a > 0.001 && (!r_ris.shader_cache || r_ris.shader_cache->canvas_item.light_mode != RasterizerStorageGLES2::Shader::CanvasItem::LIGHT_MODE_LIGHT_ONLY) && !p_ci->light_masked)) + _canvas_item_render_commands(p_ci, NULL, reclip, material_ptr); - if (ci->light_mask & light->item_mask && p_z >= light->z_min && p_z <= light->z_max && ci->global_rect_cache.intersects_transformed(light->xform_cache, light->rect_cache)) { + r_ris.rebind_shader = true; // hacked in for now. - //intersects this light + if ((blend_mode == RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX || blend_mode == RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA) && r_ris.item_group_light && !unshaded) { - if (!light_used || mode != light->mode) { + Light *light = r_ris.item_group_light; + bool light_used = false; + VS::CanvasLightMode mode = VS::CANVAS_LIGHT_MODE_ADD; + state.uniforms.final_modulate = p_ci->final_modulate; // remove the canvas modulate - mode = light->mode; + while (light) { - switch (mode) { + if (p_ci->light_mask & light->item_mask && r_ris.item_group_z >= light->z_min && r_ris.item_group_z <= light->z_max && p_ci->global_rect_cache.intersects_transformed(light->xform_cache, light->rect_cache)) { - case VS::CANVAS_LIGHT_MODE_ADD: { - glBlendEquation(GL_FUNC_ADD); - glBlendFunc(GL_SRC_ALPHA, GL_ONE); + //intersects this light - } break; - case VS::CANVAS_LIGHT_MODE_SUB: { - glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); - glBlendFunc(GL_SRC_ALPHA, GL_ONE); - } break; - case VS::CANVAS_LIGHT_MODE_MIX: - case VS::CANVAS_LIGHT_MODE_MASK: { - glBlendEquation(GL_FUNC_ADD); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + if (!light_used || mode != light->mode) { - } break; - } - } + mode = light->mode; - if (!light_used) { + switch (mode) { - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHTING, true); - light_used = true; - } + case VS::CANVAS_LIGHT_MODE_ADD: { + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_SRC_ALPHA, GL_ONE); + + } break; + case VS::CANVAS_LIGHT_MODE_SUB: { + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + glBlendFunc(GL_SRC_ALPHA, GL_ONE); + } break; + case VS::CANVAS_LIGHT_MODE_MIX: + case VS::CANVAS_LIGHT_MODE_MASK: { + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - bool has_shadow = light->shadow_buffer.is_valid() && ci->light_mask & light->item_shadow_mask; - - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SHADOWS, has_shadow); - if (has_shadow) { - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_USE_GRADIENT, light->shadow_gradient_length > 0); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_NEAREST, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_NONE); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF3, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF3); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF5, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF5); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF7, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF7); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF9, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF9); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF13, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF13); + } break; } + } - state.canvas_shader.bind(); - state.using_light = light; - state.using_shadow = has_shadow; + if (!light_used) { - //always re-set uniforms, since light parameters changed - _set_uniforms(); - state.canvas_shader.use_material((void *)material_ptr); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHTING, true); + light_used = true; + } - glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 4); - RasterizerStorageGLES2::Texture *t = storage->texture_owner.getornull(light->texture); - if (!t) { - glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); - } else { - t = t->get_ptr(); + bool has_shadow = light->shadow_buffer.is_valid() && p_ci->light_mask & light->item_shadow_mask; + + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SHADOWS, has_shadow); + if (has_shadow) { + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_USE_GRADIENT, light->shadow_gradient_length > 0); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_NEAREST, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_NONE); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF3, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF3); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF5, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF5); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF7, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF7); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF9, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF9); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF13, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF13); + } - glBindTexture(t->target, t->tex_id); - } + state.canvas_shader.bind(); + state.using_light = light; + state.using_shadow = has_shadow; + + //always re-set uniforms, since light parameters changed + _set_uniforms(); + state.canvas_shader.use_material((void *)material_ptr); - glActiveTexture(GL_TEXTURE0); - _canvas_item_render_commands(p_item_list, NULL, reclip, material_ptr); //redraw using light + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 4); + RasterizerStorageGLES2::Texture *t = storage->texture_owner.getornull(light->texture); + if (!t) { + glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); + } else { + t = t->get_ptr(); - state.using_light = NULL; + glBindTexture(t->target, t->tex_id); } - light = light->next_ptr; + glActiveTexture(GL_TEXTURE0); + _canvas_item_render_commands(p_ci, NULL, reclip, material_ptr); //redraw using light + + state.using_light = NULL; } - if (light_used) { + light = light->next_ptr; + } - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHTING, false); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SHADOWS, false); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_NEAREST, false); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF3, false); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF5, false); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF7, false); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF9, false); - state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF13, false); + if (light_used) { - state.canvas_shader.bind(); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHTING, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SHADOWS, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_NEAREST, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF3, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF5, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF7, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF9, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF13, false); - last_blend_mode = -1; + state.canvas_shader.bind(); - /* - //this is set again, so it should not be needed anyway? - state.canvas_item_modulate = unshaded ? ci->final_modulate : Color( - ci->final_modulate.r * p_modulate.r, - ci->final_modulate.g * p_modulate.g, - ci->final_modulate.b * p_modulate.b, - ci->final_modulate.a * p_modulate.a ); + r_ris.last_blend_mode = -1; + /* + //this is set again, so it should not be needed anyway? + state.canvas_item_modulate = unshaded ? ci->final_modulate : Color( + ci->final_modulate.r * p_modulate.r, + ci->final_modulate.g * p_modulate.g, + ci->final_modulate.b * p_modulate.b, + ci->final_modulate.a * p_modulate.a ); - state.canvas_shader.set_uniform(CanvasShaderGLES2::MODELVIEW_MATRIX,state.final_transform); - state.canvas_shader.set_uniform(CanvasShaderGLES2::EXTRA_MATRIX,Transform2D()); - state.canvas_shader.set_uniform(CanvasShaderGLES2::FINAL_MODULATE,state.canvas_item_modulate); - glBlendEquation(GL_FUNC_ADD); + state.canvas_shader.set_uniform(CanvasShaderGLES2::MODELVIEW_MATRIX,state.final_transform); + state.canvas_shader.set_uniform(CanvasShaderGLES2::EXTRA_MATRIX,Transform2D()); + state.canvas_shader.set_uniform(CanvasShaderGLES2::FINAL_MODULATE,state.canvas_item_modulate); - if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); - } else { - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - } + glBlendEquation(GL_FUNC_ADD); - //@TODO RESET canvas_blend_mode - */ + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); } + + //@TODO RESET canvas_blend_mode + */ } + } + + if (reclip) { + glEnable(GL_SCISSOR_TEST); + int y = storage->frame.current_rt->height - (r_ris.current_clip->final_clip_rect.position.y + r_ris.current_clip->final_clip_rect.size.y); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) + y = r_ris.current_clip->final_clip_rect.position.y; + glScissor(r_ris.current_clip->final_clip_rect.position.x, y, r_ris.current_clip->final_clip_rect.size.width, r_ris.current_clip->final_clip_rect.size.height); + } +} + +void RasterizerCanvasGLES2::render_joined_item(const BItemJoined &p_bij, RenderItemState &r_ris) { + + // all the joined items will share the same state with the first item + Item *ci = bdata.item_refs[p_bij.first_item_ref].item; + + if (r_ris.current_clip != ci->final_clip_owner) { - if (reclip) { + r_ris.current_clip = ci->final_clip_owner; + + if (r_ris.current_clip) { glEnable(GL_SCISSOR_TEST); - int y = storage->frame.current_rt->height - (current_clip->final_clip_rect.position.y + current_clip->final_clip_rect.size.y); + int y = storage->frame.current_rt->height - (r_ris.current_clip->final_clip_rect.position.y + r_ris.current_clip->final_clip_rect.size.y); if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) - y = current_clip->final_clip_rect.position.y; - glScissor(current_clip->final_clip_rect.position.x, y, current_clip->final_clip_rect.size.width, current_clip->final_clip_rect.size.height); + y = r_ris.current_clip->final_clip_rect.position.y; + glScissor(r_ris.current_clip->final_clip_rect.position.x, y, r_ris.current_clip->final_clip_rect.size.width, r_ris.current_clip->final_clip_rect.size.height); + } else { + glDisable(GL_SCISSOR_TEST); } - - p_item_list = p_item_list->next; } - if (current_clip) { - glDisable(GL_SCISSOR_TEST); + // TODO: copy back buffer + + if (ci->copy_back_buffer) { + if (ci->copy_back_buffer->full) { + _copy_texscreen(Rect2()); + } else { + _copy_texscreen(ci->copy_back_buffer->rect); + } } - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SKELETON, false); -} + RasterizerStorageGLES2::Skeleton *skeleton = NULL; -void RasterizerCanvasGLES2::canvas_debug_viewport_shadows(Light *p_lights_with_shadow) { -} + { + //skeleton handling + if (ci->skeleton.is_valid() && storage->skeleton_owner.owns(ci->skeleton)) { + skeleton = storage->skeleton_owner.get(ci->skeleton); + if (!skeleton->use_2d) { + skeleton = NULL; + } else { + state.skeleton_transform = r_ris.item_group_base_transform * skeleton->base_transform_2d; + state.skeleton_transform_inverse = state.skeleton_transform.affine_inverse(); + state.skeleton_texture_size = Vector2(skeleton->size * 2, 0); + } + } + + bool use_skeleton = skeleton != NULL; + if (r_ris.prev_use_skeleton != use_skeleton) { + r_ris.rebind_shader = true; + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SKELETON, use_skeleton); + r_ris.prev_use_skeleton = use_skeleton; + } -void RasterizerCanvasGLES2::canvas_light_shadow_buffer_update(RID p_buffer, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, CameraMatrix *p_xform_cache) { + if (skeleton) { + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 3); + glBindTexture(GL_TEXTURE_2D, skeleton->tex_id); + state.using_skeleton = true; + } else { + state.using_skeleton = false; + } + } - RasterizerStorageGLES2::CanvasLightShadow *cls = storage->canvas_light_shadow_owner.get(p_buffer); - ERR_FAIL_COND(!cls); + Item *material_owner = ci->material_owner ? ci->material_owner : ci; - glDisable(GL_BLEND); - glDisable(GL_SCISSOR_TEST); - glDisable(GL_DITHER); - glDisable(GL_CULL_FACE); - glDepthFunc(GL_LEQUAL); - glEnable(GL_DEPTH_TEST); - glDepthMask(true); + RID material = material_owner->material; + RasterizerStorageGLES2::Material *material_ptr = storage->material_owner.getornull(material); - glBindFramebuffer(GL_FRAMEBUFFER, cls->fbo); + if (material != r_ris.canvas_last_material || r_ris.rebind_shader) { - state.canvas_shadow_shader.set_conditional(CanvasShadowShaderGLES2::USE_RGBA_SHADOWS, storage->config.use_rgba_2d_shadows); - state.canvas_shadow_shader.bind(); + RasterizerStorageGLES2::Shader *shader_ptr = NULL; - glViewport(0, 0, cls->size, cls->height); - glClearDepth(1.0f); - glClearColor(1, 1, 1, 1); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + if (material_ptr) { + shader_ptr = material_ptr->shader; - VS::CanvasOccluderPolygonCullMode cull = VS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED; + if (shader_ptr && shader_ptr->mode != VS::SHADER_CANVAS_ITEM) { + shader_ptr = NULL; // not a canvas item shader, don't use. + } + } - for (int i = 0; i < 4; i++) { + if (shader_ptr) { + if (shader_ptr->canvas_item.uses_screen_texture) { + if (!state.canvas_texscreen_used) { + //copy if not copied before + _copy_texscreen(Rect2()); - //make sure it remains orthogonal, makes easy to read angle later + // blend mode will have been enabled so make sure we disable it again later on + //last_blend_mode = last_blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_DISABLED ? last_blend_mode : -1; + } - Transform light; - light.origin[0] = p_light_xform[2][0]; - light.origin[1] = p_light_xform[2][1]; - light.basis[0][0] = p_light_xform[0][0]; - light.basis[0][1] = p_light_xform[1][0]; - light.basis[1][0] = p_light_xform[0][1]; - light.basis[1][1] = p_light_xform[1][1]; + if (storage->frame.current_rt->copy_screen_effect.color) { + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 4); + glBindTexture(GL_TEXTURE_2D, storage->frame.current_rt->copy_screen_effect.color); + } + } - //light.basis.scale(Vector3(to_light.elements[0].length(),to_light.elements[1].length(),1)); + if (shader_ptr != r_ris.shader_cache) { - //p_near=1; - CameraMatrix projection; - { - real_t fov = 90; - real_t nearp = p_near; - real_t farp = p_far; - real_t aspect = 1.0; + if (shader_ptr->canvas_item.uses_time) { + VisualServerRaster::redraw_request(); + } - real_t ymax = nearp * Math::tan(Math::deg2rad(fov * 0.5)); - real_t ymin = -ymax; - real_t xmin = ymin * aspect; - real_t xmax = ymax * aspect; + state.canvas_shader.set_custom_shader(shader_ptr->custom_code_id); + state.canvas_shader.bind(); + } - projection.set_frustum(xmin, xmax, ymin, ymax, nearp, farp); - } + int tc = material_ptr->textures.size(); + Pair *textures = material_ptr->textures.ptrw(); - Vector3 cam_target = Basis(Vector3(0, 0, Math_PI * 2 * (i / 4.0))).xform(Vector3(0, 1, 0)); - projection = projection * CameraMatrix(Transform().looking_at(cam_target, Vector3(0, 0, -1)).affine_inverse()); + ShaderLanguage::ShaderNode::Uniform::Hint *texture_hints = shader_ptr->texture_hints.ptrw(); - state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES2::PROJECTION_MATRIX, projection); - state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES2::LIGHT_MATRIX, light); - state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES2::DISTANCE_NORM, 1.0 / p_far); + for (int i = 0; i < tc; i++) { - if (i == 0) - *p_xform_cache = projection; + glActiveTexture(GL_TEXTURE0 + i); - glViewport(0, (cls->height / 4) * i, cls->size, cls->height / 4); + RasterizerStorageGLES2::Texture *t = storage->texture_owner.getornull(textures[i].second); - LightOccluderInstance *instance = p_occluders; + if (!t) { - while (instance) { + switch (texture_hints[i]) { + case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK_ALBEDO: + case ShaderLanguage::ShaderNode::Uniform::HINT_BLACK: { + glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); + } break; + case ShaderLanguage::ShaderNode::Uniform::HINT_ANISO: { + glBindTexture(GL_TEXTURE_2D, storage->resources.aniso_tex); + } break; + case ShaderLanguage::ShaderNode::Uniform::HINT_NORMAL: { + glBindTexture(GL_TEXTURE_2D, storage->resources.normal_tex); + } break; + default: { + glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); + } break; + } - RasterizerStorageGLES2::CanvasOccluder *cc = storage->canvas_occluder_owner.getornull(instance->polygon_buffer); - if (!cc || cc->len == 0 || !(p_light_mask & instance->light_mask)) { + continue; + } - instance = instance->next; - continue; - } + if (t->redraw_if_visible) { + VisualServerRaster::redraw_request(); + } - state.canvas_shadow_shader.set_uniform(CanvasShadowShaderGLES2::WORLD_MATRIX, instance->xform_cache); + t = t->get_ptr(); - VS::CanvasOccluderPolygonCullMode transformed_cull_cache = instance->cull_cache; +#ifdef TOOLS_ENABLED + if (t->detect_normal && texture_hints[i] == ShaderLanguage::ShaderNode::Uniform::HINT_NORMAL) { + t->detect_normal(t->detect_normal_ud); + } +#endif + if (t->render_target) + t->render_target->used_in_frame = true; - if (transformed_cull_cache != VS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED && - (p_light_xform.basis_determinant() * instance->xform_cache.basis_determinant()) < 0) { - transformed_cull_cache = - transformed_cull_cache == VS::CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE ? - VS::CANVAS_OCCLUDER_POLYGON_CULL_COUNTER_CLOCKWISE : - VS::CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE; + glBindTexture(t->target, t->tex_id); } - if (cull != transformed_cull_cache) { + } else { + state.canvas_shader.set_custom_shader(0); + state.canvas_shader.bind(); + } + state.canvas_shader.use_material((void *)material_ptr); - cull = transformed_cull_cache; - switch (cull) { - case VS::CANVAS_OCCLUDER_POLYGON_CULL_DISABLED: { + r_ris.shader_cache = shader_ptr; - glDisable(GL_CULL_FACE); + r_ris.canvas_last_material = material; + + r_ris.rebind_shader = false; + } - } break; - case VS::CANVAS_OCCLUDER_POLYGON_CULL_CLOCKWISE: { + int blend_mode = r_ris.shader_cache ? r_ris.shader_cache->canvas_item.blend_mode : RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX; + bool unshaded = r_ris.shader_cache && (r_ris.shader_cache->canvas_item.light_mode == RasterizerStorageGLES2::Shader::CanvasItem::LIGHT_MODE_UNSHADED || (blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX && blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA)); + bool reclip = false; - glEnable(GL_CULL_FACE); - glCullFace(GL_FRONT); - } break; - case VS::CANVAS_OCCLUDER_POLYGON_CULL_COUNTER_CLOCKWISE: { + if (r_ris.last_blend_mode != blend_mode) { - glEnable(GL_CULL_FACE); - glCullFace(GL_BACK); + switch (blend_mode) { - } break; + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); } - } - glBindBuffer(GL_ARRAY_BUFFER, cc->vertex_id); - glEnableVertexAttribArray(VS::ARRAY_VERTEX); - glVertexAttribPointer(VS::ARRAY_VERTEX, 3, GL_FLOAT, false, 0, 0); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, cc->index_id); + } break; + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_ADD: { - glDrawElements(GL_TRIANGLES, cc->len * 3, GL_UNSIGNED_SHORT, 0); + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_SRC_ALPHA, GL_ONE); + } else { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ZERO, GL_ONE); + } + + } break; + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_SUB: { - instance = instance->next; + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_SRC_ALPHA, GL_ONE); + } else { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ZERO, GL_ONE); + } + } break; + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MUL: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_DST_COLOR, GL_ZERO, GL_DST_ALPHA, GL_ZERO); + } else { + glBlendFuncSeparate(GL_DST_COLOR, GL_ZERO, GL_ZERO, GL_ONE); + } + } break; + case RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA: { + glBlendEquation(GL_FUNC_ADD); + if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFuncSeparate(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ZERO, GL_ONE); + } + } break; } } - glBindBuffer(GL_ARRAY_BUFFER, 0); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); -} -void RasterizerCanvasGLES2::reset_canvas() { + state.uniforms.final_modulate = unshaded ? ci->final_modulate : Color(ci->final_modulate.r * r_ris.item_group_modulate.r, ci->final_modulate.g * r_ris.item_group_modulate.g, ci->final_modulate.b * r_ris.item_group_modulate.b, ci->final_modulate.a * r_ris.item_group_modulate.a); - glDisable(GL_CULL_FACE); - glDisable(GL_DEPTH_TEST); - glDisable(GL_SCISSOR_TEST); - glDisable(GL_DITHER); - glEnable(GL_BLEND); + if (!p_bij.use_hardware_transform()) + state.uniforms.modelview_matrix = Transform2D(); + else + state.uniforms.modelview_matrix = ci->final_transform; + state.uniforms.extra_matrix = Transform2D(); - if (storage->frame.current_rt && storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); - } else { - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - } + _set_uniforms(); - // bind the back buffer to a texture so shaders can use it. - // It should probably use texture unit -3 (as GLES2 does as well) but currently that's buggy. - // keeping this for now as there's nothing else that uses texture unit 2 - // TODO ^ - if (storage->frame.current_rt) { - // glActiveTexture(GL_TEXTURE0 + 2); - // glBindTexture(GL_TEXTURE_2D, storage->frame.current_rt->copy_screen_effect.color); - } + if (unshaded || (state.uniforms.final_modulate.a > 0.001 && (!r_ris.shader_cache || r_ris.shader_cache->canvas_item.light_mode != RasterizerStorageGLES2::Shader::CanvasItem::LIGHT_MODE_LIGHT_ONLY) && !ci->light_masked)) + render_joined_item_commands(p_bij, NULL, reclip, material_ptr); - glBindBuffer(GL_ARRAY_BUFFER, 0); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); -} + r_ris.rebind_shader = true; // hacked in for now. -void RasterizerCanvasGLES2::_bind_quad_buffer() { - glBindBuffer(GL_ARRAY_BUFFER, data.canvas_quad_vertices); - glEnableVertexAttribArray(VS::ARRAY_VERTEX); - glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, 0, NULL); -} -void RasterizerCanvasGLES2::draw_generic_textured_rect(const Rect2 &p_rect, const Rect2 &p_src) { + if ((blend_mode == RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX || blend_mode == RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA) && r_ris.item_group_light && !unshaded) { - state.canvas_shader.set_uniform(CanvasShaderGLES2::DST_RECT, Color(p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y)); - state.canvas_shader.set_uniform(CanvasShaderGLES2::SRC_RECT, Color(p_src.position.x, p_src.position.y, p_src.size.x, p_src.size.y)); + Light *light = r_ris.item_group_light; + bool light_used = false; + VS::CanvasLightMode mode = VS::CANVAS_LIGHT_MODE_ADD; + state.uniforms.final_modulate = ci->final_modulate; // remove the canvas modulate - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); -} + while (light) { -void RasterizerCanvasGLES2::draw_lens_distortion_rect(const Rect2 &p_rect, float p_k1, float p_k2, const Vector2 &p_eye_center, float p_oversample) { - Vector2 half_size; - if (storage->frame.current_rt) { - half_size = Vector2(storage->frame.current_rt->width, storage->frame.current_rt->height); - } else { - half_size = OS::get_singleton()->get_window_size(); - } - half_size *= 0.5; - Vector2 offset((p_rect.position.x - half_size.x) / half_size.x, (p_rect.position.y - half_size.y) / half_size.y); - Vector2 scale(p_rect.size.x / half_size.x, p_rect.size.y / half_size.y); + // use the bounding rect of the joined items, NOT only the bounding rect of the first item. + // note this is a cost of batching, the light culling will be less effective + if (ci->light_mask & light->item_mask && r_ris.item_group_z >= light->z_min && r_ris.item_group_z <= light->z_max && p_bij.bounding_rect.intersects_transformed(light->xform_cache, light->rect_cache)) { - float aspect_ratio = p_rect.size.x / p_rect.size.y; + //intersects this light - // setup our lens shader - state.lens_shader.bind(); - state.lens_shader.set_uniform(LensDistortedShaderGLES2::OFFSET, offset); - state.lens_shader.set_uniform(LensDistortedShaderGLES2::SCALE, scale); - state.lens_shader.set_uniform(LensDistortedShaderGLES2::K1, p_k1); - state.lens_shader.set_uniform(LensDistortedShaderGLES2::K2, p_k2); - state.lens_shader.set_uniform(LensDistortedShaderGLES2::EYE_CENTER, p_eye_center); - state.lens_shader.set_uniform(LensDistortedShaderGLES2::UPSCALE, p_oversample); - state.lens_shader.set_uniform(LensDistortedShaderGLES2::ASPECT_RATIO, aspect_ratio); + if (!light_used || mode != light->mode) { - // bind our quad buffer - _bind_quad_buffer(); + mode = light->mode; - // and draw - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + switch (mode) { - // and cleanup - glBindBuffer(GL_ARRAY_BUFFER, 0); + case VS::CANVAS_LIGHT_MODE_ADD: { + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_SRC_ALPHA, GL_ONE); - for (int i = 0; i < VS::ARRAY_MAX; i++) { - glDisableVertexAttribArray(i); - } -} + } break; + case VS::CANVAS_LIGHT_MODE_SUB: { + glBlendEquation(GL_FUNC_REVERSE_SUBTRACT); + glBlendFunc(GL_SRC_ALPHA, GL_ONE); + } break; + case VS::CANVAS_LIGHT_MODE_MIX: + case VS::CANVAS_LIGHT_MODE_MASK: { + glBlendEquation(GL_FUNC_ADD); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); -void RasterizerCanvasGLES2::draw_window_margins(int *black_margin, RID *black_image) { + } break; + } + } - Vector2 window_size = OS::get_singleton()->get_window_size(); - int window_h = window_size.height; - int window_w = window_size.width; + if (!light_used) { - glBindFramebuffer(GL_FRAMEBUFFER, storage->system_fbo); - glViewport(0, 0, window_size.width, window_size.height); - canvas_begin(); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHTING, true); + light_used = true; + } - if (black_image[MARGIN_LEFT].is_valid()) { - _bind_canvas_texture(black_image[MARGIN_LEFT], RID()); - Size2 sz(storage->texture_get_width(black_image[MARGIN_LEFT]), storage->texture_get_height(black_image[MARGIN_LEFT])); - draw_generic_textured_rect(Rect2(0, 0, black_margin[MARGIN_LEFT], window_h), Rect2(0, 0, sz.x, sz.y)); - } else if (black_margin[MARGIN_LEFT]) { - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); + bool has_shadow = light->shadow_buffer.is_valid() && ci->light_mask & light->item_shadow_mask; + + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SHADOWS, has_shadow); + if (has_shadow) { + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_USE_GRADIENT, light->shadow_gradient_length > 0); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_NEAREST, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_NONE); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF3, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF3); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF5, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF5); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF7, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF7); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF9, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF9); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF13, light->shadow_filter == VS::CANVAS_LIGHT_FILTER_PCF13); + } - draw_generic_textured_rect(Rect2(0, 0, black_margin[MARGIN_LEFT], window_h), Rect2(0, 0, 1, 1)); - } + state.canvas_shader.bind(); + state.using_light = light; + state.using_shadow = has_shadow; - if (black_image[MARGIN_RIGHT].is_valid()) { - _bind_canvas_texture(black_image[MARGIN_RIGHT], RID()); - Size2 sz(storage->texture_get_width(black_image[MARGIN_RIGHT]), storage->texture_get_height(black_image[MARGIN_RIGHT])); - draw_generic_textured_rect(Rect2(window_w - black_margin[MARGIN_RIGHT], 0, black_margin[MARGIN_RIGHT], window_h), Rect2(0, 0, sz.x, sz.y)); - } else if (black_margin[MARGIN_RIGHT]) { - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); + //always re-set uniforms, since light parameters changed + _set_uniforms(); + state.canvas_shader.use_material((void *)material_ptr); - draw_generic_textured_rect(Rect2(window_w - black_margin[MARGIN_RIGHT], 0, black_margin[MARGIN_RIGHT], window_h), Rect2(0, 0, 1, 1)); - } + glActiveTexture(GL_TEXTURE0 + storage->config.max_texture_image_units - 4); + RasterizerStorageGLES2::Texture *t = storage->texture_owner.getornull(light->texture); + if (!t) { + glBindTexture(GL_TEXTURE_2D, storage->resources.white_tex); + } else { + t = t->get_ptr(); - if (black_image[MARGIN_TOP].is_valid()) { - _bind_canvas_texture(black_image[MARGIN_TOP], RID()); + glBindTexture(t->target, t->tex_id); + } - Size2 sz(storage->texture_get_width(black_image[MARGIN_TOP]), storage->texture_get_height(black_image[MARGIN_TOP])); - draw_generic_textured_rect(Rect2(0, 0, window_w, black_margin[MARGIN_TOP]), Rect2(0, 0, sz.x, sz.y)); + glActiveTexture(GL_TEXTURE0); - } else if (black_margin[MARGIN_TOP]) { - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); + // redraw using light. + // if there is no clip item, we can consider scissoring to the intersection area between the light and the item + // this can greatly reduce fill rate .. + // at the cost of glScissor commands, so is optional + if (!bdata.settings_scissor_lights || r_ris.current_clip) { + render_joined_item_commands(p_bij, NULL, reclip, material_ptr); + } else { + bool scissor = _light_scissor_begin(p_bij.bounding_rect, light->xform_cache, light->rect_cache); + render_joined_item_commands(p_bij, NULL, reclip, material_ptr); + if (scissor) { + glDisable(GL_SCISSOR_TEST); + } + } - draw_generic_textured_rect(Rect2(0, 0, window_w, black_margin[MARGIN_TOP]), Rect2(0, 0, 1, 1)); - } + state.using_light = NULL; + } + + light = light->next_ptr; + } + + if (light_used) { - if (black_image[MARGIN_BOTTOM].is_valid()) { + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHTING, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_SHADOWS, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_NEAREST, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF3, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF5, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF7, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF9, false); + state.canvas_shader.set_conditional(CanvasShaderGLES2::SHADOW_FILTER_PCF13, false); - _bind_canvas_texture(black_image[MARGIN_BOTTOM], RID()); + state.canvas_shader.bind(); - Size2 sz(storage->texture_get_width(black_image[MARGIN_BOTTOM]), storage->texture_get_height(black_image[MARGIN_BOTTOM])); - draw_generic_textured_rect(Rect2(0, window_h - black_margin[MARGIN_BOTTOM], window_w, black_margin[MARGIN_BOTTOM]), Rect2(0, 0, sz.x, sz.y)); + r_ris.last_blend_mode = -1; - } else if (black_margin[MARGIN_BOTTOM]) { + /* + //this is set again, so it should not be needed anyway? + state.canvas_item_modulate = unshaded ? ci->final_modulate : Color( + ci->final_modulate.r * p_modulate.r, + ci->final_modulate.g * p_modulate.g, + ci->final_modulate.b * p_modulate.b, + ci->final_modulate.a * p_modulate.a ); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, storage->resources.black_tex); - draw_generic_textured_rect(Rect2(0, window_h - black_margin[MARGIN_BOTTOM], window_w, black_margin[MARGIN_BOTTOM]), Rect2(0, 0, 1, 1)); + state.canvas_shader.set_uniform(CanvasShaderGLES2::MODELVIEW_MATRIX,state.final_transform); + state.canvas_shader.set_uniform(CanvasShaderGLES2::EXTRA_MATRIX,Transform2D()); + state.canvas_shader.set_uniform(CanvasShaderGLES2::FINAL_MODULATE,state.canvas_item_modulate); + + glBlendEquation(GL_FUNC_ADD); + + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_TRANSPARENT]) { + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA); + } else { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + + //@TODO RESET canvas_blend_mode + */ + } } - canvas_end(); + if (reclip) { + glEnable(GL_SCISSOR_TEST); + int y = storage->frame.current_rt->height - (r_ris.current_clip->final_clip_rect.position.y + r_ris.current_clip->final_clip_rect.size.y); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) + y = r_ris.current_clip->final_clip_rect.position.y; + glScissor(r_ris.current_clip->final_clip_rect.position.x, y, r_ris.current_clip->final_clip_rect.size.width, r_ris.current_clip->final_clip_rect.size.height); + } } -void RasterizerCanvasGLES2::initialize() { +bool RasterizerCanvasGLES2::_light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const { + // transform light to world space (note this is done in the earlier intersection test, so could + // be made more efficient) + Vector2 pts[4] = { + p_light_xform.xform(p_light_rect.position), + p_light_xform.xform(Vector2(p_light_rect.position.x + p_light_rect.size.x, p_light_rect.position.y)), + p_light_xform.xform(Vector2(p_light_rect.position.x, p_light_rect.position.y + p_light_rect.size.y)), + p_light_xform.xform(Vector2(p_light_rect.position.x + p_light_rect.size.x, p_light_rect.position.y + p_light_rect.size.y)), + }; - // quad buffer - { - glGenBuffers(1, &data.canvas_quad_vertices); - glBindBuffer(GL_ARRAY_BUFFER, data.canvas_quad_vertices); + // calculate the light bound rect in world space + Rect2 lrect(pts[0].x, pts[0].y, 0, 0); + for (int n = 1; n < 4; n++) { + lrect.expand_to(pts[n]); + } - const float qv[8] = { - 0, 0, - 0, 1, - 1, 1, - 1, 0 - }; + // intersection between the 2 rects + // they should probably always intersect, because of earlier check, but just in case... + if (!p_item_rect.intersects(lrect)) + return false; - glBufferData(GL_ARRAY_BUFFER, sizeof(float) * 8, qv, GL_STATIC_DRAW); + // note this does almost the same as Rect2.clip but slightly more efficient for our use case + r_cliprect.position.x = MAX(p_item_rect.position.x, lrect.position.x); + r_cliprect.position.y = MAX(p_item_rect.position.y, lrect.position.y); - glBindBuffer(GL_ARRAY_BUFFER, 0); - } + Point2 item_rect_end = p_item_rect.position + p_item_rect.size; + Point2 lrect_end = lrect.position + lrect.size; - // polygon buffer - { - uint32_t poly_size = GLOBAL_DEF("rendering/limits/buffers/canvas_polygon_buffer_size_kb", 128); - ProjectSettings::get_singleton()->set_custom_property_info("rendering/limits/buffers/canvas_polygon_buffer_size_kb", PropertyInfo(Variant::INT, "rendering/limits/buffers/canvas_polygon_buffer_size_kb", PROPERTY_HINT_RANGE, "0,256,1,or_greater")); - poly_size *= 1024; - poly_size = MAX(poly_size, (2 + 2 + 4) * 4 * sizeof(float)); - glGenBuffers(1, &data.polygon_buffer); - glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); - glBufferData(GL_ARRAY_BUFFER, poly_size, NULL, GL_DYNAMIC_DRAW); + r_cliprect.size.x = MIN(item_rect_end.x, lrect_end.x) - r_cliprect.position.x; + r_cliprect.size.y = MIN(item_rect_end.y, lrect_end.y) - r_cliprect.position.y; - data.polygon_buffer_size = poly_size; + return true; +} - glBindBuffer(GL_ARRAY_BUFFER, 0); +bool RasterizerCanvasGLES2::_light_scissor_begin(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect) const { - uint32_t index_size = GLOBAL_DEF("rendering/limits/buffers/canvas_polygon_index_buffer_size_kb", 128); - ProjectSettings::get_singleton()->set_custom_property_info("rendering/limits/buffers/canvas_polygon_index_buffer_size_kb", PropertyInfo(Variant::INT, "rendering/limits/buffers/canvas_polygon_index_buffer_size_kb", PROPERTY_HINT_RANGE, "0,256,1,or_greater")); - index_size *= 1024; // kb - glGenBuffers(1, &data.polygon_index_buffer); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, index_size, NULL, GL_DYNAMIC_DRAW); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + float area_item = p_item_rect.size.x * p_item_rect.size.y; // double check these are always positive - data.polygon_index_buffer_size = index_size; + // quick reject .. the area of pixels saved can never be more than the area of the item + if (area_item < bdata.scissor_threshold_area) { + return false; } - // ninepatch buffers - { - // array buffer - glGenBuffers(1, &data.ninepatch_vertices); - glBindBuffer(GL_ARRAY_BUFFER, data.ninepatch_vertices); - - glBufferData(GL_ARRAY_BUFFER, sizeof(float) * (16 + 16) * 2, NULL, GL_DYNAMIC_DRAW); + Rect2 cliprect; + if (!_light_find_intersection(p_item_rect, p_light_xform, p_light_rect, cliprect)) { + // should not really occur .. but just in case + cliprect = Rect2(0, 0, 0, 0); + } else { + // some conditions not to scissor + // determine the area (fill rate) that will be saved + float area_cliprect = cliprect.size.x * cliprect.size.y; + float area_saved = area_item - area_cliprect; + + // if area saved is too small, don't scissor + if (area_saved < bdata.scissor_threshold_area) { + return false; + } + } - glBindBuffer(GL_ARRAY_BUFFER, 0); + glEnable(GL_SCISSOR_TEST); + int y = storage->frame.current_rt->height - (cliprect.position.y + cliprect.size.y); + if (storage->frame.current_rt->flags[RasterizerStorage::RENDER_TARGET_VFLIP]) + y = cliprect.position.y; + glScissor(cliprect.position.x, y, cliprect.size.width, cliprect.size.height); - // element buffer - glGenBuffers(1, &data.ninepatch_elements); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.ninepatch_elements); + return true; +} -#define _EIDX(y, x) (y * 4 + x) - uint8_t elems[3 * 2 * 9] = { +void RasterizerCanvasGLES2::_calculate_scissor_threshold_area() { + if (!bdata.settings_scissor_lights) { + return; + } - // first row + // scissor area threshold is 0.0 to 1.0 in the settings for ease of use. + // we need to translate to an absolute area to determine quickly whether + // to scissor. + if (bdata.settings_scissor_threshold < 0.0001f) { + bdata.scissor_threshold_area = -1.0f; // will always pass + } else { + // in pixels + int w = storage->frame.current_rt->width; + int h = storage->frame.current_rt->height; - _EIDX(0, 0), _EIDX(0, 1), _EIDX(1, 1), - _EIDX(1, 1), _EIDX(1, 0), _EIDX(0, 0), + int screen_area = w * h; - _EIDX(0, 1), _EIDX(0, 2), _EIDX(1, 2), - _EIDX(1, 2), _EIDX(1, 1), _EIDX(0, 1), + bdata.scissor_threshold_area = bdata.settings_scissor_threshold * screen_area; + } +} - _EIDX(0, 2), _EIDX(0, 3), _EIDX(1, 3), - _EIDX(1, 3), _EIDX(1, 2), _EIDX(0, 2), +void RasterizerCanvasGLES2::initialize() { + RasterizerCanvasBaseGLES2::initialize(); - // second row + bdata.settings_use_batching = GLOBAL_GET("rendering/gles2/batching/use_batching"); + bdata.settings_max_join_item_commands = GLOBAL_GET("rendering/gles2/batching/max_join_item_commands"); + bdata.settings_colored_vertex_format_threshold = GLOBAL_GET("rendering/gles2/batching/colored_vertex_format_threshold"); - _EIDX(1, 0), _EIDX(1, 1), _EIDX(2, 1), - _EIDX(2, 1), _EIDX(2, 0), _EIDX(1, 0), + // we can use the threshold to determine whether to turn scissoring off or on + bdata.settings_scissor_threshold = GLOBAL_GET("rendering/gles2/batching/light_scissor_area_threshold"); + if (bdata.settings_scissor_threshold > 0.999f) { + bdata.settings_scissor_lights = false; + } else { + bdata.settings_scissor_lights = true; + } - // the center one would be here, but we'll put it at the end - // so it's easier to disable the center and be able to use - // one draw call for both + // The sweet spot on my desktop for cache is actually smaller than the max, and this + // is the default. This saves memory too so we will use it for now, needs testing to see whether this varies according + // to device / platform. + bdata.settings_batch_buffer_num_verts = GLOBAL_GET("rendering/gles2/batching/batch_buffer_size"); - _EIDX(1, 2), _EIDX(1, 3), _EIDX(2, 3), - _EIDX(2, 3), _EIDX(2, 2), _EIDX(1, 2), + // override the use_batching setting in the editor + // (note that if the editor can't start, you can't change the use_batching project setting!) + if (Engine::get_singleton()->is_editor_hint()) { + bool use_in_editor = GLOBAL_GET("rendering/gles2/debug/use_batching_in_editor"); + bdata.settings_use_batching = use_in_editor; + } - // third row + // For debugging, if flash is set in project settings, it will flash on alternate frames + // between the non-batched renderer and the batched renderer, + // in order to find regressions. + // This should not be used except during development. + // make a note of the original choice in case we are flashing on and off the batching + bdata.settings_use_batching_original_choice = bdata.settings_use_batching; + bdata.settings_flash_batching = GLOBAL_GET("rendering/gles2/debug/flash_batching"); + if (!bdata.settings_use_batching) { + // no flash when batching turned off + bdata.settings_flash_batching = false; + } - _EIDX(2, 0), _EIDX(2, 1), _EIDX(3, 1), - _EIDX(3, 1), _EIDX(3, 0), _EIDX(2, 0), + // the maximum num quads in a batch is limited by GLES2. We can have only 16 bit indices, + // which means we can address a vertex buffer of max size 65535. 4 vertices are needed per quad. + + // Note this determines the memory use by the vertex buffer vector. max quads (65536/4)-1 + // but can be reduced to save memory if really required (will result in more batches though) + const int max_possible_quads = (65536 / 4) - 1; + const int min_possible_quads = 8; // some reasonable small value + + // value from project settings + int max_quads = bdata.settings_batch_buffer_num_verts / 4; + + // sanity checks + max_quads = CLAMP(max_quads, min_possible_quads, max_possible_quads); + bdata.settings_max_join_item_commands = CLAMP(bdata.settings_max_join_item_commands, 0, 65535); + bdata.settings_colored_vertex_format_threshold = CLAMP(bdata.settings_colored_vertex_format_threshold, 0.0f, 1.0f); + bdata.settings_scissor_threshold = CLAMP(bdata.settings_scissor_threshold, 0.0f, 1.0f); + + // for debug purposes, output a string with the batching options + String batching_options_string = "OpenGL ES 2.0 Batching: "; + if (bdata.settings_use_batching) { + batching_options_string += "ON\n\tOPTIONS\n"; + batching_options_string += "\tmax_join_item_commands " + itos(bdata.settings_max_join_item_commands) + "\n"; + batching_options_string += "\tcolored_vertex_format_threshold " + String(Variant(bdata.settings_colored_vertex_format_threshold)) + "\n"; + batching_options_string += "\tbatch_buffer_size " + itos(bdata.settings_batch_buffer_num_verts) + "\n"; + batching_options_string += "\tlight_scissor_area_threshold " + String(Variant(bdata.settings_scissor_threshold)) + "\n"; + batching_options_string += "\tdebug_flash " + String(Variant(bdata.settings_flash_batching)); + } else { + batching_options_string += "OFF"; + } + print_line(batching_options_string); + + // special case, for colored vertex format threshold. + // as the comparison is >=, we want to be able to totally turn on or off + // conversion to colored vertex format at the extremes, so we will force + // 1.0 to be just above 1.0 + if (bdata.settings_colored_vertex_format_threshold > 0.995f) { + bdata.settings_colored_vertex_format_threshold = 1.01f; + } - _EIDX(2, 1), _EIDX(2, 2), _EIDX(3, 2), - _EIDX(3, 2), _EIDX(3, 1), _EIDX(2, 1), + // save memory when batching off + if (!bdata.settings_use_batching) { + max_quads = 0; + } - _EIDX(2, 2), _EIDX(2, 3), _EIDX(3, 3), - _EIDX(3, 3), _EIDX(3, 2), _EIDX(2, 2), + uint32_t sizeof_batch_vert = sizeof(BatchVertex); - // center field + bdata.max_quads = max_quads; - _EIDX(1, 1), _EIDX(1, 2), _EIDX(2, 2), - _EIDX(2, 2), _EIDX(2, 1), _EIDX(1, 1) - }; -#undef _EIDX + // 4 verts per quad + bdata.vertex_buffer_size_units = max_quads * 4; - glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(elems), elems, GL_STATIC_DRAW); + // the index buffer can be longer than 65535, only the indices need to be within this range + bdata.index_buffer_size_units = max_quads * 6; - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - } + // this comes out at approx 64K for non-colored vertex buffer, and 128K for colored vertex buffer + bdata.vertex_buffer_size_bytes = bdata.vertex_buffer_size_units * sizeof_batch_vert; + bdata.index_buffer_size_bytes = bdata.index_buffer_size_units * 2; // 16 bit inds - state.canvas_shadow_shader.init(); + // create equal number of norma and colored verts (as the normal may need to be translated to colored) + bdata.vertices.create(bdata.vertex_buffer_size_units); // 512k + bdata.vertices_colored.create(bdata.vertices.max_size()); // 1024k - state.canvas_shader.init(); + // num batches will be auto increased dynamically if required + bdata.batches.create(1024); + bdata.batches_temp.create(bdata.batches.max_size()); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_RGBA_SHADOWS, storage->config.use_rgba_2d_shadows); + // batch textures can also be increased dynamically + bdata.batch_textures.create(32); - state.canvas_shader.bind(); + // just reserve some space (may not be needed as we are orphaning, but hey ho) + glGenBuffers(1, &bdata.gl_vertex_buffer); - state.lens_shader.init(); + if (bdata.vertex_buffer_size_bytes) { + glBindBuffer(GL_ARRAY_BUFFER, bdata.gl_vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, bdata.vertex_buffer_size_bytes, NULL, GL_DYNAMIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP, GLOBAL_DEF("rendering/quality/2d/use_pixel_snap", false)); + // pre fill index buffer, the indices never need to change so can be static + glGenBuffers(1, &bdata.gl_index_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, bdata.gl_index_buffer); + + Vector indices; + indices.resize(bdata.index_buffer_size_units); + + for (int q = 0; q < max_quads; q++) { + int i_pos = q * 6; // 6 inds per quad + int q_pos = q * 4; // 4 verts per quad + indices.set(i_pos, q_pos); + indices.set(i_pos + 1, q_pos + 1); + indices.set(i_pos + 2, q_pos + 2); + indices.set(i_pos + 3, q_pos); + indices.set(i_pos + 4, q_pos + 2); + indices.set(i_pos + 5, q_pos + 3); + + // we can only use 16 bit indices in GLES2! +#ifdef DEBUG_ENABLED + CRASH_COND((q_pos + 3) > 65535); +#endif + } - state.using_light = NULL; - state.using_transparent_rt = false; - state.using_skeleton = false; -} + glBufferData(GL_ELEMENT_ARRAY_BUFFER, bdata.index_buffer_size_bytes, &indices[0], GL_STATIC_DRAW); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); -void RasterizerCanvasGLES2::finalize() { + } // only if there is a vertex buffer (batching is on) } RasterizerCanvasGLES2::RasterizerCanvasGLES2() { -#ifdef GLES_OVER_GL - use_nvidia_rect_workaround = GLOBAL_GET("rendering/quality/2d/gles2_use_nvidia_rect_flicker_workaround"); -#else - // Not needed (a priori) on GLES devices - use_nvidia_rect_workaround = false; -#endif + + bdata.settings_use_batching = false; } diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index f6ae6a60c05b..73ec5a8281e0 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -31,118 +31,254 @@ #ifndef RASTERIZERCANVASGLES2_H #define RASTERIZERCANVASGLES2_H -#include "rasterizer_storage_gles2.h" -#include "servers/visual/rasterizer.h" - -#include "shaders/canvas.glsl.gen.h" -#include "shaders/lens_distorted.glsl.gen.h" - -#include "shaders/canvas_shadow.glsl.gen.h" +#include "rasterizer_canvas_base_gles2.h" class RasterizerSceneGLES2; -class RasterizerCanvasGLES2 : public RasterizerCanvas { -public: - enum { - INSTANCE_ATTRIB_BASE = 8, - }; - - struct Uniforms { - Transform projection_matrix; +class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { - Transform2D modelview_matrix; - Transform2D extra_matrix; - - Color final_modulate; - - float time; + // used to determine whether we use hardware transform (none) + // software transform all verts, or software transform just a translate + // (no rotate or scale) + enum TransformMode { + TM_NONE, + TM_ALL, + TM_TRANSLATE, }; - struct Data { - - GLuint canvas_quad_vertices; - GLuint polygon_buffer; - GLuint polygon_index_buffer; - - uint32_t polygon_buffer_size; - uint32_t polygon_index_buffer_size; - - GLuint ninepatch_vertices; - GLuint ninepatch_elements; - - } data; - - struct State { - Uniforms uniforms; - bool canvas_texscreen_used; - CanvasShaderGLES2 canvas_shader; - CanvasShadowShaderGLES2 canvas_shadow_shader; - LensDistortedShaderGLES2 lens_shader; - - bool using_texture_rect; - bool using_ninepatch; - bool using_skeleton; - - Transform2D skeleton_transform; - Transform2D skeleton_transform_inverse; - Size2i skeleton_texture_size; + // pod versions of vector and color and RID, need to be 32 bit for vertex format + struct BatchVector2 { + float x, y; + void set(const Vector2 &p_o) { + x = p_o.x; + y = p_o.y; + } + void to(Vector2 &r_o) const { + r_o.x = x; + r_o.y = y; + } + }; - RID current_tex; - RID current_normal; - RasterizerStorageGLES2::Texture *current_tex_ptr; + struct BatchColor { + float r, g, b, a; + void set(const Color &p_c) { + r = p_c.r; + g = p_c.g; + b = p_c.b; + a = p_c.a; + } + bool equals(const Color &p_c) const { + return (r == p_c.r) && (g == p_c.g) && (b == p_c.b) && (a == p_c.a); + } + const float *get_data() const { return &r; } + }; - Transform vp; - Light *using_light; - bool using_shadow; - bool using_transparent_rt; + struct BatchVertex { + // must be 32 bit pod + BatchVector2 pos; + BatchVector2 uv; + }; - } state; + struct BatchVertexColored : public BatchVertex { + // must be 32 bit pod + BatchColor col; + }; - typedef void Texture; + struct Batch { + enum CommandType : uint32_t { + BT_DEFAULT, + BT_RECT, + }; + + CommandType type; + uint32_t first_command; // also item reference number + uint32_t num_commands; + uint32_t first_quad; + uint32_t batch_texture_id; + BatchColor color; + }; - RasterizerSceneGLES2 *scene_render; + struct BatchTex { + enum TileMode : uint32_t { + TILE_OFF, + TILE_NORMAL, + TILE_FORCE_REPEAT, + }; + RID RID_texture; + RID RID_normal; + TileMode tile_mode; + BatchVector2 tex_pixel_size; + }; - RasterizerStorageGLES2 *storage; + // batch item may represent 1 or more items + struct BItemJoined { + uint32_t first_item_ref; + uint32_t num_item_refs; - bool use_nvidia_rect_workaround; + Rect2 bounding_rect; - virtual RID light_internal_create(); - virtual void light_internal_update(RID p_rid, Light *p_light); - virtual void light_internal_free(RID p_rid); + // we are always splitting items with lots of commands, + // and items with unhandled primitives (default) + bool use_hardware_transform() const { return num_item_refs == 1; } + }; - void _set_uniforms(); + struct BItemRef { + Item *item; + }; - virtual void canvas_begin(); - virtual void canvas_end(); + struct BatchData { + BatchData(); + void reset_flush() { + batches.reset(); + batch_textures.reset(); + vertices.reset(); + + total_quads = 0; + total_color_changes = 0; + } + + GLuint gl_vertex_buffer; + GLuint gl_index_buffer; + + uint32_t max_quads; + uint32_t vertex_buffer_size_units; + uint32_t vertex_buffer_size_bytes; + uint32_t index_buffer_size_units; + uint32_t index_buffer_size_bytes; + + RasterizerArrayGLES2 vertices; + RasterizerArrayGLES2 vertices_colored; + RasterizerArrayGLES2 batches; + RasterizerArrayGLES2 batches_temp; // used for translating to colored vertex batches + RasterizerArray_non_pod_GLES2 batch_textures; // the only reason this is non-POD is because of RIDs + + bool use_colored_vertices; + + RasterizerArrayGLES2 items_joined; + RasterizerArrayGLES2 item_refs; + + // counts + int total_quads; + + // we keep a record of how many color changes caused new batches + // if the colors are causing an excessive number of batches, we switch + // to alternate batching method and add color to the vertex format. + int total_color_changes; + + // measured in pixels, recalculated each frame + float scissor_threshold_area; + + // global settings + bool settings_use_batching; // the current use_batching (affected by flash) + bool settings_use_batching_original_choice; // the choice entered in project settings + bool settings_flash_batching; // for regression testing, flash between non-batched and batched renderer + int settings_max_join_item_commands; + float settings_colored_vertex_format_threshold; + int settings_batch_buffer_num_verts; + bool settings_scissor_lights; + float settings_scissor_threshold; // 0.0 to 1.0 + } bdata; + + struct RenderItemState { + RenderItemState(); + Item *current_clip; + RasterizerStorageGLES2::Shader *shader_cache; + bool rebind_shader; + bool prev_use_skeleton; + int last_blend_mode; + RID canvas_last_material; + Color final_modulate; - _FORCE_INLINE_ void _draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs); - _FORCE_INLINE_ void _draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights = NULL, const int *p_bones = NULL); - _FORCE_INLINE_ void _draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor); - _FORCE_INLINE_ void _draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor); + // 'item group' is data over a single call to canvas_render_items + int item_group_z; + Color item_group_modulate; + Light *item_group_light; + Transform2D item_group_base_transform; + }; - _FORCE_INLINE_ void _canvas_item_render_commands(Item *p_item, Item *current_clip, bool &reclip, RasterizerStorageGLES2::Material *p_material); - void _copy_screen(const Rect2 &p_rect); - _FORCE_INLINE_ void _copy_texscreen(const Rect2 &p_rect); + struct FillState { + void reset() { + curr_batch = 0; + batch_tex_id = -1; + use_hardware_transform = true; + texpixel_size = Vector2(1, 1); + } + Batch *curr_batch; + int batch_tex_id; + bool use_hardware_transform; + Vector2 texpixel_size; + }; +public: virtual void canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); - virtual void canvas_debug_viewport_shadows(Light *p_lights_with_shadow); - - virtual void canvas_light_shadow_buffer_update(RID p_buffer, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, CameraMatrix *p_xform_cache); - - virtual void reset_canvas(); - - RasterizerStorageGLES2::Texture *_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map); - void _bind_quad_buffer(); - void draw_generic_textured_rect(const Rect2 &p_rect, const Rect2 &p_src); - void draw_lens_distortion_rect(const Rect2 &p_rect, float p_k1, float p_k2, const Vector2 &p_eye_center, float p_oversample); +private: + // legacy codepath .. to remove after testing + void _canvas_render_item(Item *p_ci, RenderItemState &r_ris); + _FORCE_INLINE_ void _canvas_item_render_commands(Item *p_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); + + // high level batch funcs + void canvas_render_items_implementation(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); + void render_joined_item(const BItemJoined &p_bij, RenderItemState &r_ris); + void join_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); + bool try_join_item(Item *p_ci, RenderItemState &r_ris, bool &r_batch_break); + void render_joined_item_commands(const BItemJoined &p_bij, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); + void render_batches(Item::Command *const *p_commands, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); + bool prefill_joined_item(FillState &r_fill_state, int &r_command_start, Item *p_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); + void flush_render_batches(Item *p_first_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); + + // low level batch funcs + void _batch_translate_to_colored(); + _FORCE_INLINE_ int _batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match); + RasterizerStorageGLES2::Texture *_get_canvas_texture(const RID &p_texture) const; + void _batch_upload_buffers(); + void _batch_render_rects(const Batch &p_batch, RasterizerStorageGLES2::Material *p_material); + BatchVertex *_batch_vertex_request_new() { return bdata.vertices.request(); } + Batch *_batch_request_new(bool p_blank = true); + + bool _detect_batch_break(Item *p_ci); + void _software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const; + void _software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const; + TransformMode _find_transform_mode(bool p_use_hardware_transform, const Transform2D &p_tr, Transform2D &r_tr) const; + + // light scissoring + bool _light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const; + bool _light_scissor_begin(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect) const; + void _calculate_scissor_threshold_area(); +public: void initialize(); - void finalize(); - - virtual void draw_window_margins(int *black_margin, RID *black_image); - RasterizerCanvasGLES2(); }; +////////////////////////////////////////////////////////////// + +_FORCE_INLINE_ void RasterizerCanvasGLES2::_software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const { + Vector2 vc(r_v.x, r_v.y); + vc = p_tr.xform(vc); + r_v.set(vc); +} + +_FORCE_INLINE_ void RasterizerCanvasGLES2::_software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const { + r_v = p_tr.xform(r_v); +} + +_FORCE_INLINE_ RasterizerCanvasGLES2::TransformMode RasterizerCanvasGLES2::_find_transform_mode(bool p_use_hardware_transform, const Transform2D &p_tr, Transform2D &r_tr) const { + if (!p_use_hardware_transform) { + r_tr = p_tr; + + // decided whether to do translate only for software transform + if ((p_tr.elements[0].x == 1.0) && + (p_tr.elements[0].y == 0.0) && + (p_tr.elements[1].x == 0.0) && + (p_tr.elements[1].y == 1.0)) { + return TM_TRANSLATE; + } else { + return TM_ALL; + } + } + + return TM_NONE; +} + #endif // RASTERIZERCANVASGLES2_H diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index 19b9e2c783ce..6685cd7861b3 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -2411,6 +2411,19 @@ VisualServer::VisualServer() { GLOBAL_DEF("rendering/quality/depth_prepass/disable_for_vendors", "PowerVR,Mali,Adreno,Apple"); GLOBAL_DEF("rendering/quality/filters/use_nearest_mipmap_filter", false); + + GLOBAL_DEF("rendering/gles2/batching/use_batching", true); + GLOBAL_DEF("rendering/gles2/batching/max_join_item_commands", 16); + GLOBAL_DEF("rendering/gles2/batching/colored_vertex_format_threshold", 0.25f); + GLOBAL_DEF("rendering/gles2/batching/light_scissor_area_threshold", 1.0f); + GLOBAL_DEF("rendering/gles2/batching/batch_buffer_size", 16384); + GLOBAL_DEF("rendering/gles2/debug/flash_batching", false); + GLOBAL_DEF_RST("rendering/gles2/debug/use_batching_in_editor", false); + + ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles2/batching/max_join_item_commands", PropertyInfo(Variant::INT, "rendering/gles2/batching/max_join_item_commands", PROPERTY_HINT_RANGE, "0,65535")); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles2/batching/colored_vertex_format_threshold", PropertyInfo(Variant::REAL, "rendering/gles2/batching/colored_vertex_format_threshold", PROPERTY_HINT_RANGE, "0.0,1.0,0.01")); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles2/batching/batch_buffer_size", PropertyInfo(Variant::INT, "rendering/gles2/batching/batch_buffer_size", PROPERTY_HINT_RANGE, "1024,65535,1024")); + ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles2/batching/light_scissor_area_threshold", PropertyInfo(Variant::REAL, "rendering/gles2/batching/light_scissor_area_threshold", PROPERTY_HINT_RANGE, "0.0,1.0")); } VisualServer::~VisualServer() { From e7bec77ef36c2f5bc9da96d0adfe34f98223c9ed Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Mon, 6 Apr 2020 12:49:47 +0100 Subject: [PATCH 2/7] Bake final_modulate uniform into vertex colors Where the final_modulate color varies between render_items this can prevent batching. This PR solves this by baking final_modulate into the vertex colors, and setting the uniform 'final_modulate' to white, and allowing the joining of items that have different final_modulate values. The previous batching system can then cope with vertex color changes as normal. --- drivers/gles2/rasterizer_canvas_gles2.cpp | 52 ++++++++++++++++++----- drivers/gles2/rasterizer_canvas_gles2.h | 2 + 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index dc2885796c07..2698e49f5709 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -224,6 +224,12 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ Vector2 texpixel_size = r_fill_state.texpixel_size; + // checking the color for not being white makes it 92/90 times faster in the case where it is white + bool multiply_final_modulate = false; + if (!r_fill_state.use_hardware_transform && (r_fill_state.final_modulate != Color(1, 1, 1, 1))) { + multiply_final_modulate = true; + } + // start batch is a dummy batch (tex id -1) .. could be made more efficient if (!r_fill_state.curr_batch) { r_fill_state.curr_batch = _batch_request_new(); @@ -259,7 +265,10 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ Item::CommandRect *rect = static_cast(command); - const Color &col = rect->modulate; + Color col = rect->modulate; + if (multiply_final_modulate) { + col *= r_fill_state.final_modulate; + } // instead of doing all the texture preparation for EVERY rect, // we build a list of texture combinations and do this once off. @@ -1421,7 +1430,9 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij fill_state.use_hardware_transform = p_bij.use_hardware_transform(); for (unsigned int i = 0; i < p_bij.num_item_refs; i++) { - item = bdata.item_refs[p_bij.first_item_ref + i].item; + const BItemRef &ref = bdata.item_refs[p_bij.first_item_ref + i]; + item = ref.item; + fill_state.final_modulate = ref.final_modulate; int command_count = item->commands.size(); int command_start = 0; @@ -1453,7 +1464,9 @@ void RasterizerCanvasGLES2::flush_render_batches(Item *p_first_item, Item *p_cur // only check whether to convert if there are quads (prevent divide by zero) if (bdata.total_quads) { - float ratio = (float)bdata.total_color_changes / (float)bdata.total_quads; + // minus 1 to prevent single primitives (ratio 1.0) always being converted to colored.. + // in that case it is slightly cheaper to just have the color as part of the batch + float ratio = (float)(bdata.total_color_changes - 1) / (float)bdata.total_quads; // use bigger than or equal so that 0.0 threshold can force always using colored verts if (ratio >= bdata.settings_colored_vertex_format_threshold) { @@ -1534,6 +1547,11 @@ void RasterizerCanvasGLES2::join_items(Item *p_item_list, int p_z, const Color & // add the reference BItemRef *r = bdata.item_refs.request_with_grow(); r->item = ci; + // we are storing final_modulate in advance per item reference + // for baking into vertex colors. + // this may not be ideal... as we are increasing the size of item reference, + // but it is stupidly complex to calculate later, which would probably be slower. + r->final_modulate = render_item_state.final_modulate; } else { CRASH_COND(j == 0); j->num_item_refs += 1; @@ -1541,6 +1559,7 @@ void RasterizerCanvasGLES2::join_items(Item *p_item_list, int p_z, const Color & BItemRef *r = bdata.item_refs.request_with_grow(); r->item = ci; + r->final_modulate = render_item_state.final_modulate; } p_item_list = p_item_list->next; @@ -1628,10 +1647,12 @@ bool RasterizerCanvasGLES2::try_join_item(Item *p_ci, RenderItemState &r_ris, bo // light_masked may possibly need state checking here. Check for regressions! - if (p_ci->final_modulate != r_ris.final_modulate) { - join = false; - r_ris.final_modulate = p_ci->final_modulate; - } + // we will now allow joining even if final modulate is different + // we will instead bake the final modulate into the vertex colors + // if (p_ci->final_modulate != r_ris.final_modulate) { + // join = false; + // r_ris.final_modulate = p_ci->final_modulate; + // } if (r_ris.current_clip != p_ci->final_clip_owner) { r_ris.current_clip = p_ci->final_clip_owner; @@ -1707,6 +1728,11 @@ bool RasterizerCanvasGLES2::try_join_item(Item *p_ci, RenderItemState &r_ris, bo bool unshaded = r_ris.shader_cache && (r_ris.shader_cache->canvas_item.light_mode == RasterizerStorageGLES2::Shader::CanvasItem::LIGHT_MODE_UNSHADED || (blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_MIX && blend_mode != RasterizerStorageGLES2::Shader::CanvasItem::BLEND_MODE_PMALPHA)); bool reclip = false; + // we are precalculating the final_modulate ahead of time because we need this for baking of final modulate into vertex colors + // (only in software transform mode) + // This maybe inefficient storing it... + r_ris.final_modulate = unshaded ? p_ci->final_modulate : (p_ci->final_modulate * r_ris.item_group_modulate); + if (r_ris.last_blend_mode != blend_mode) { join = false; r_ris.last_blend_mode = blend_mode; @@ -2370,12 +2396,16 @@ void RasterizerCanvasGLES2::render_joined_item(const BItemJoined &p_bij, RenderI } } - state.uniforms.final_modulate = unshaded ? ci->final_modulate : Color(ci->final_modulate.r * r_ris.item_group_modulate.r, ci->final_modulate.g * r_ris.item_group_modulate.g, ci->final_modulate.b * r_ris.item_group_modulate.b, ci->final_modulate.a * r_ris.item_group_modulate.a); - - if (!p_bij.use_hardware_transform()) + // using software transform + if (!p_bij.use_hardware_transform()) { state.uniforms.modelview_matrix = Transform2D(); - else + // final_modulate will be baked per item ref and multiplied by a NULL final modulate in the shader + state.uniforms.final_modulate = Color(1, 1, 1, 1); + } else { state.uniforms.modelview_matrix = ci->final_transform; + // could use the stored version of final_modulate in item ref? Test which is faster NYI + state.uniforms.final_modulate = unshaded ? ci->final_modulate : (ci->final_modulate * r_ris.item_group_modulate); + } state.uniforms.extra_matrix = Transform2D(); _set_uniforms(); diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index 73ec5a8281e0..44cb1584ad4c 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -124,6 +124,7 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { struct BItemRef { Item *item; + Color final_modulate; }; struct BatchData { @@ -207,6 +208,7 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { int batch_tex_id; bool use_hardware_transform; Vector2 texpixel_size; + Color final_modulate; }; public: From 1fb6181ba63c1b258dbeff3ac0e577a664f03a26 Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Sat, 11 Apr 2020 17:40:30 +0100 Subject: [PATCH 3/7] Revert to default Rect drawing code for single rects Determined that a large reason for the decrease in performance in unbatchable scenes was due to the new routine being analogous to the 'nvidia workaround' code, that is about half the speed. So this simply uses the old routine in the case of single unbatchable rects. Hopefully we will be able to remove the old path at a later stage. --- drivers/gles2/rasterizer_canvas_gles2.cpp | 48 +++++++++++++++++------ drivers/gles2/rasterizer_canvas_gles2.h | 14 +++++++ 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index 2698e49f5709..17d22b6b0610 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -250,21 +250,45 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ switch (command->type) { default: { - if (r_fill_state.curr_batch->type == Batch::BT_DEFAULT) { - // another default command, just add to the existing batch - r_fill_state.curr_batch->num_commands++; - } else { - // end of previous different type batch, so start new default batch - r_fill_state.curr_batch = _batch_request_new(); - r_fill_state.curr_batch->type = Batch::BT_DEFAULT; - r_fill_state.curr_batch->first_command = command_num; - r_fill_state.curr_batch->num_commands = 1; - } + _prefill_default_batch(r_fill_state, command_num); } break; case Item::Command::TYPE_RECT: { Item::CommandRect *rect = static_cast(command); + bool change_batch = false; + + // conditions for creating a new batch + if (r_fill_state.curr_batch->type != Batch::BT_RECT) { + change_batch = true; + + // check for special case if there is only a single or small number of rects, + // in which case we will use the legacy default rect renderer + // because it is faster for single rects + + // we only want to do this if not a joined item with more than 1 item, + // because joined items with more than 1, the command * will be incorrect + // NOTE - this is assuming that use_hardware_transform means that it is a non-joined item!! + // If that assumption is incorrect this will go horribly wrong. + if (r_fill_state.use_hardware_transform) { + bool is_single_rect = false; + int command_num_next = command_num + 1; + if (command_num_next < command_count) { + Item::Command *command_next = commands[command_num_next]; + if (command_next->type != Item::Command::TYPE_RECT) { + is_single_rect = true; + } + } else { + is_single_rect = true; + } + // if it is a rect on its own, do exactly the same as the default routine + if (is_single_rect) { + _prefill_default_batch(r_fill_state, command_num); + break; + } + } + } // if use hardware transform + Color col = rect->modulate; if (multiply_final_modulate) { col *= r_fill_state.final_modulate; @@ -290,10 +314,8 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ return true; } - bool change_batch = false; - // conditions for creating a new batch - if ((r_fill_state.curr_batch->type != Batch::BT_RECT) || (old_batch_tex_id != r_fill_state.batch_tex_id)) { + if (old_batch_tex_id != r_fill_state.batch_tex_id) { change_batch = true; } diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index 44cb1584ad4c..a1eab4fef41f 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -242,6 +242,7 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { void _software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const; void _software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const; TransformMode _find_transform_mode(bool p_use_hardware_transform, const Transform2D &p_tr, Transform2D &r_tr) const; + _FORCE_INLINE_ void _prefill_default_batch(FillState &r_fill_state, int p_command_num); // light scissoring bool _light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const; @@ -255,6 +256,19 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { ////////////////////////////////////////////////////////////// +_FORCE_INLINE_ void RasterizerCanvasGLES2::_prefill_default_batch(FillState &r_fill_state, int p_command_num) { + if (r_fill_state.curr_batch->type == Batch::BT_DEFAULT) { + // another default command, just add to the existing batch + r_fill_state.curr_batch->num_commands++; + } else { + // end of previous different type batch, so start new default batch + r_fill_state.curr_batch = _batch_request_new(); + r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + r_fill_state.curr_batch->first_command = p_command_num; + r_fill_state.curr_batch->num_commands = 1; + } +} + _FORCE_INLINE_ void RasterizerCanvasGLES2::_software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const { Vector2 vc(r_v.x, r_v.y); vc = p_tr.xform(vc); From 93af8e7d1bf8a4721c718163e74ef070d1dea208 Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Sun, 12 Apr 2020 13:52:25 +0100 Subject: [PATCH 4/7] Batching across z_indices Extra functions canvas_render_items_begin and canvas_render_items_end are added to RasterizerCanvas, with noop stubs for non-GLES2 renderers. This enables batching to be spready over multiple z_indices, and multiple calls to canvas_render_items. It does this by only performing item joining within canvas_render_items, and deferring rendering until canvas_render_items_end(). --- drivers/gles2/rasterizer_canvas_gles2.cpp | 89 +++++++++++++++-------- drivers/gles2/rasterizer_canvas_gles2.h | 12 ++- servers/visual/rasterizer.h | 2 + servers/visual/visual_server_canvas.cpp | 4 + 4 files changed, 72 insertions(+), 35 deletions(-) diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index 17d22b6b0610..69d06251fff5 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -67,16 +67,18 @@ RasterizerCanvasGLES2::BatchData::BatchData() { settings_scissor_threshold = -1.0f; } -RasterizerCanvasGLES2::RenderItemState::RenderItemState() { - current_clip = NULL; - shader_cache = NULL; +void RasterizerCanvasGLES2::RenderItemState::reset() { + current_clip = nullptr; + shader_cache = nullptr; rebind_shader = true; prev_use_skeleton = false; last_blend_mode = -1; canvas_last_material = RID(); item_group_z = 0; - item_group_light = 0; + item_group_light = nullptr; final_modulate = Color(-1.0, -1.0, -1.0, -1.0); // just something unlikely + + joined_item = nullptr; } RasterizerStorageGLES2::Texture *RasterizerCanvasGLES2::_get_canvas_texture(const RID &p_texture) const { @@ -286,8 +288,8 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ _prefill_default_batch(r_fill_state, command_num); break; } - } - } // if use hardware transform + } // if use hardware transform + } Color col = rect->modulate; if (multiply_final_modulate) { @@ -1523,17 +1525,9 @@ void RasterizerCanvasGLES2::_canvas_item_render_commands(Item *p_item, Item *p_c render_batches(commands, p_current_clip, r_reclip, p_material); } -void RasterizerCanvasGLES2::join_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { - bdata.items_joined.reset(); - bdata.item_refs.reset(); - - RenderItemState render_item_state; - render_item_state.item_group_z = p_z; - render_item_state.item_group_modulate = p_modulate; - render_item_state.item_group_light = p_light; - render_item_state.item_group_base_transform = p_base_transform; +void RasterizerCanvasGLES2::join_items(Item *p_item_list, int p_z) { - BItemJoined *j = 0; + _render_item_state.item_group_z = p_z; // join is whether to join to the previous batch. // batch_break is whether to PREVENT the next batch from joining with us @@ -1554,17 +1548,17 @@ void RasterizerCanvasGLES2::join_items(Item *p_item_list, int p_z, const Color & // even though we know join is false. // also we need to run try_join_item for every item because it keeps the state up to date, // if we didn't run it the state would be out of date. - try_join_item(ci, render_item_state, batch_break); + try_join_item(ci, _render_item_state, batch_break); } else { - join = try_join_item(ci, render_item_state, batch_break); + join = try_join_item(ci, _render_item_state, batch_break); } // assume the first item will always return no join if (!join) { - j = bdata.items_joined.request_with_grow(); - j->first_item_ref = bdata.item_refs.size(); - j->num_item_refs = 1; - j->bounding_rect = ci->global_rect_cache; + _render_item_state.joined_item = bdata.items_joined.request_with_grow(); + _render_item_state.joined_item->first_item_ref = bdata.item_refs.size(); + _render_item_state.joined_item->num_item_refs = 1; + _render_item_state.joined_item->bounding_rect = ci->global_rect_cache; // add the reference BItemRef *r = bdata.item_refs.request_with_grow(); @@ -1573,23 +1567,22 @@ void RasterizerCanvasGLES2::join_items(Item *p_item_list, int p_z, const Color & // for baking into vertex colors. // this may not be ideal... as we are increasing the size of item reference, // but it is stupidly complex to calculate later, which would probably be slower. - r->final_modulate = render_item_state.final_modulate; + r->final_modulate = _render_item_state.final_modulate; } else { - CRASH_COND(j == 0); - j->num_item_refs += 1; - j->bounding_rect = j->bounding_rect.merge(ci->global_rect_cache); + CRASH_COND(_render_item_state.joined_item == 0); + _render_item_state.joined_item->num_item_refs += 1; + _render_item_state.joined_item->bounding_rect = _render_item_state.joined_item->bounding_rect.merge(ci->global_rect_cache); BItemRef *r = bdata.item_refs.request_with_grow(); r->item = ci; - r->final_modulate = render_item_state.final_modulate; + r->final_modulate = _render_item_state.final_modulate; } p_item_list = p_item_list->next; } } -void RasterizerCanvasGLES2::canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { - +void RasterizerCanvasGLES2::canvas_render_items_begin(const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { // if we are debugging, flash each frame between batching renderer and old version to compare for regressions if (bdata.settings_flash_batching) { if ((Engine::get_singleton()->get_frames_drawn() % 2) == 0) @@ -1598,15 +1591,44 @@ void RasterizerCanvasGLES2::canvas_render_items(Item *p_item_list, int p_z, cons bdata.settings_use_batching = false; } + if (!bdata.settings_use_batching) { + return; + } + // this only needs to be done when screen size changes, but this should be // infrequent enough _calculate_scissor_threshold_area(); - // state 1 : join similar items, so that their state changes are not repeated, + // set up render item state for all the z_indexes (this is common to all z_indexes) + _render_item_state.reset(); + _render_item_state.item_group_modulate = p_modulate; + _render_item_state.item_group_light = p_light; + _render_item_state.item_group_base_transform = p_base_transform; +} + +void RasterizerCanvasGLES2::canvas_render_items_end() { + if (!bdata.settings_use_batching) { + return; + } + + // batching render is deferred until after going through all the z_indices, joining all the items + canvas_render_items_implementation(0, 0, _render_item_state.item_group_modulate, + _render_item_state.item_group_light, + _render_item_state.item_group_base_transform); + + bdata.items_joined.reset(); + bdata.item_refs.reset(); +} + +void RasterizerCanvasGLES2::canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { + // stage 1 : join similar items, so that their state changes are not repeated, // and commands from joined items can be batched together - if (bdata.settings_use_batching) - join_items(p_item_list, p_z, p_modulate, p_light, p_base_transform); + if (bdata.settings_use_batching) { + join_items(p_item_list, p_z); + return; + } + // only legacy renders at this stage, batched renderer doesn't render until canvas_render_items_end() canvas_render_items_implementation(p_item_list, p_z, p_modulate, p_light, p_base_transform); } @@ -1767,6 +1789,9 @@ bool RasterizerCanvasGLES2::try_join_item(Item *p_ci, RenderItemState &r_ris, bo // a + light_blend + b + light_blend IS NOT THE SAME AS // a + b + light_blend join = false; + + // we also dont want to allow joining this item with the next item, because the next item could have no lights! + r_batch_break = true; } if (reclip) { diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index a1eab4fef41f..cf8adba95e95 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -181,7 +181,8 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { } bdata; struct RenderItemState { - RenderItemState(); + RenderItemState() { reset(); } + void reset(); Item *current_clip; RasterizerStorageGLES2::Shader *shader_cache; bool rebind_shader; @@ -190,12 +191,15 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { RID canvas_last_material; Color final_modulate; + // used for joining items only + BItemJoined *joined_item; + // 'item group' is data over a single call to canvas_render_items int item_group_z; Color item_group_modulate; Light *item_group_light; Transform2D item_group_base_transform; - }; + } _render_item_state; struct FillState { void reset() { @@ -212,6 +216,8 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { }; public: + virtual void canvas_render_items_begin(const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); + virtual void canvas_render_items_end(); virtual void canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); private: @@ -222,7 +228,7 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { // high level batch funcs void canvas_render_items_implementation(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); void render_joined_item(const BItemJoined &p_bij, RenderItemState &r_ris); - void join_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); + void join_items(Item *p_item_list, int p_z); bool try_join_item(Item *p_ci, RenderItemState &r_ris, bool &r_batch_break); void render_joined_item_commands(const BItemJoined &p_bij, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); void render_batches(Item::Command *const *p_commands, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h index 0f528ee161de..82d85ad7238b 100644 --- a/servers/visual/rasterizer.h +++ b/servers/visual/rasterizer.h @@ -1066,6 +1066,8 @@ class RasterizerCanvas { virtual void canvas_begin() = 0; virtual void canvas_end() = 0; + virtual void canvas_render_items_begin(const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) {} + virtual void canvas_render_items_end() {} virtual void canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) = 0; virtual void canvas_debug_viewport_shadows(Light *p_lights_with_shadow) = 0; diff --git a/servers/visual/visual_server_canvas.cpp b/servers/visual/visual_server_canvas.cpp index f7c53884fc70..f13d18e9090e 100644 --- a/servers/visual/visual_server_canvas.cpp +++ b/servers/visual/visual_server_canvas.cpp @@ -42,11 +42,13 @@ void VisualServerCanvas::_render_canvas_item_tree(Item *p_canvas_item, const Tra _render_canvas_item(p_canvas_item, p_transform, p_clip_rect, Color(1, 1, 1, 1), 0, z_list, z_last_list, NULL, NULL); + VSG::canvas_render->canvas_render_items_begin(p_modulate, p_lights, p_transform); for (int i = 0; i < z_range; i++) { if (!z_list[i]) continue; VSG::canvas_render->canvas_render_items(z_list[i], VS::CANVAS_ITEM_Z_MIN + i, p_modulate, p_lights, p_transform); } + VSG::canvas_render->canvas_render_items_end(); } void _collect_ysort_children(VisualServerCanvas::Item *p_canvas_item, Transform2D p_transform, VisualServerCanvas::Item *p_material_owner, const Color p_modulate, VisualServerCanvas::Item **r_items, int &r_index) { @@ -259,6 +261,7 @@ void VisualServerCanvas::render_canvas(Canvas *p_canvas, const Transform2D &p_tr _render_canvas_item(ci[i].item, p_transform, p_clip_rect, Color(1, 1, 1, 1), 0, z_list, z_last_list, NULL, NULL); } + VSG::canvas_render->canvas_render_items_begin(p_canvas->modulate, p_lights, p_transform); for (int i = 0; i < z_range; i++) { if (!z_list[i]) continue; @@ -269,6 +272,7 @@ void VisualServerCanvas::render_canvas(Canvas *p_canvas, const Transform2D &p_tr VSG::canvas_render->canvas_render_items(z_list[i], VS::CANVAS_ITEM_Z_MIN + i, p_canvas->modulate, p_lights, p_transform); } + VSG::canvas_render->canvas_render_items_end(); } else { for (int i = 0; i < l; i++) { From a4cd274ca72f5a42d12cf5667ac8417be61d4d4c Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Wed, 15 Apr 2020 12:38:13 +0100 Subject: [PATCH 5/7] Batching with Extra Matrix commands Defers sending 'transform' commands within a RasterizerCanvas::Item until they are needed for default batches. Instead locally caches the extra matrix and applies it using software transform, preventing unnecessary batch breaks. The logic is relatively complex, and the whole 'extra matrix' of the legacy renderer in addition to the final_transform is not ideal. However this is required to accelerate some user drawing techniques, and later the lines in the IDE. --- drivers/gles2/rasterizer_canvas_gles2.cpp | 77 +++++++++++--- drivers/gles2/rasterizer_canvas_gles2.h | 118 ++++++++++++++++++---- 2 files changed, 159 insertions(+), 36 deletions(-) diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index 69d06251fff5..73957beb81a9 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -221,9 +221,7 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ int command_count = p_item->commands.size(); Item::Command *const *commands = p_item->commands.ptr(); - Transform2D transform; - TransformMode transform_mode = _find_transform_mode(r_fill_state.use_hardware_transform, p_item->final_transform, transform); - + // just a local, might be more efficient in a register (check) Vector2 texpixel_size = r_fill_state.texpixel_size; // checking the color for not being white makes it 92/90 times faster in the case where it is white @@ -252,7 +250,36 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ switch (command->type) { default: { - _prefill_default_batch(r_fill_state, command_num); + _prefill_default_batch(r_fill_state, command_num, *p_item); + } break; + case Item::Command::TYPE_TRANSFORM: { + // if the extra matrix has been sent already, + // break this extra matrix software path (as we don't want to unset it on the GPU etc) + if (r_fill_state.extra_matrix_sent) { + _prefill_default_batch(r_fill_state, command_num, *p_item); + } else { + // Extra matrix fast path. + // Instead of sending the command immediately, we store the modified transform (in combined) + // for software transform, and only flush this transform command if we NEED to (i.e. we want to + // render some default commands) + Item::CommandTransform *transform = static_cast(command); + const Transform2D &extra_matrix = transform->xform; + + if (r_fill_state.use_hardware_transform) { + // if we are using hardware transform mode, we have already sent the final transform, + // so we only want to software transform the extra matrix + r_fill_state.transform_combined = extra_matrix; + } else { + r_fill_state.transform_combined = p_item->final_transform * extra_matrix; + } + // after a transform command, always use some form of software transform (either the combined final + extra, or just the extra) + // until we flush this dirty extra matrix because we need to render default commands. + r_fill_state.transform_mode = _find_transform_mode(r_fill_state.transform_combined); + + // make a note of which command the dirty extra matrix is store in, so we can send it later + // if necessary + r_fill_state.transform_extra_command_number_p1 = command_num + 1; // plus 1 so we can test against zero + } } break; case Item::Command::TYPE_RECT: { @@ -277,7 +304,7 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ int command_num_next = command_num + 1; if (command_num_next < command_count) { Item::Command *command_next = commands[command_num_next]; - if (command_next->type != Item::Command::TYPE_RECT) { + if ((command_next->type != Item::Command::TYPE_RECT) && (command_next->type != Item::Command::TYPE_TRANSFORM)) { is_single_rect = true; } } else { @@ -285,7 +312,7 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ } // if it is a rect on its own, do exactly the same as the default routine if (is_single_rect) { - _prefill_default_batch(r_fill_state, command_num); + _prefill_default_batch(r_fill_state, command_num, *p_item); break; } } // if use hardware transform @@ -352,8 +379,8 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ // fill the quad geometry Vector2 mins = rect->rect.position; - if (transform_mode == TM_TRANSLATE) { - _software_transform_vertex(mins, transform); + if (r_fill_state.transform_mode == TM_TRANSLATE) { + _software_transform_vertex(mins, r_fill_state.transform_combined); } Vector2 maxs = mins + rect->rect.size; @@ -385,11 +412,11 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ SWAP(bB->pos, bC->pos); } - if (transform_mode == TM_ALL) { - _software_transform_vertex(bA->pos, transform); - _software_transform_vertex(bB->pos, transform); - _software_transform_vertex(bC->pos, transform); - _software_transform_vertex(bD->pos, transform); + if (r_fill_state.transform_mode == TM_ALL) { + _software_transform_vertex(bA->pos, r_fill_state.transform_combined); + _software_transform_vertex(bB->pos, r_fill_state.transform_combined); + _software_transform_vertex(bC->pos, r_fill_state.transform_combined); + _software_transform_vertex(bD->pos, r_fill_state.transform_combined); } // uvs @@ -1452,6 +1479,7 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij FillState fill_state; fill_state.reset(); fill_state.use_hardware_transform = p_bij.use_hardware_transform(); + fill_state.extra_matrix_sent = false; for (unsigned int i = 0; i < p_bij.num_item_refs; i++) { const BItemRef &ref = bdata.item_refs[p_bij.first_item_ref + i]; @@ -1461,6 +1489,23 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij int command_count = item->commands.size(); int command_start = 0; + // ONCE OFF fill state setup, that will be retained over multiple calls to + // prefill_joined_item() + fill_state.transform_combined = item->final_transform; + + // decide the initial transform mode, and make a backup + // in orig_transform_mode in case we need to switch back + if (!fill_state.use_hardware_transform) { + fill_state.transform_mode = _find_transform_mode(fill_state.transform_combined); + } else { + fill_state.transform_mode = TM_NONE; + } + fill_state.orig_transform_mode = fill_state.transform_mode; + + // keep track of when we added an extra matrix + // so we can defer sending until we see a default command + fill_state.transform_extra_command_number_p1 = 0; + while (command_start < command_count) { // fill as many batches as possible (until all done, or the vertex buffer is full) bool bFull = prefill_joined_item(fill_state, command_start, item, p_current_clip, r_reclip, p_material); @@ -1469,7 +1514,6 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij // always pass first item (commands for default are always first item) flush_render_batches(first_item, p_current_clip, r_reclip, p_material); fill_state.reset(); - fill_state.use_hardware_transform = p_bij.use_hardware_transform(); } } } @@ -1799,7 +1843,7 @@ bool RasterizerCanvasGLES2::try_join_item(Item *p_ci, RenderItemState &r_ris, bo } // non rects will break the batching anyway, we don't want to record item changes, detect this - if (_detect_batch_break(p_ci)) { + if (!r_batch_break && _detect_batch_break(p_ci)) { join = false; r_batch_break = true; } @@ -1847,7 +1891,8 @@ bool RasterizerCanvasGLES2::_detect_batch_break(Item *p_ci) { default: { return true; } break; - case Item::Command::TYPE_RECT: { + case Item::Command::TYPE_RECT: + case Item::Command::TYPE_TRANSFORM: { } break; } // switch diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index cf8adba95e95..4de3a197c279 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -203,9 +203,10 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { struct FillState { void reset() { + // don't reset members that need to be preserved after flushing + // half way through a list of commands curr_batch = 0; batch_tex_id = -1; - use_hardware_transform = true; texpixel_size = Vector2(1, 1); } Batch *curr_batch; @@ -213,6 +214,13 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { bool use_hardware_transform; Vector2 texpixel_size; Color final_modulate; + TransformMode transform_mode; + TransformMode orig_transform_mode; + + // support for extra matrices + bool extra_matrix_sent; // whether sent on this item (in which case sofware transform can't be used untl end of item) + int transform_extra_command_number_p1; // plus one to allow fast checking against zero + Transform2D transform_combined; // final * extra }; public: @@ -247,8 +255,8 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { bool _detect_batch_break(Item *p_ci); void _software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const; void _software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const; - TransformMode _find_transform_mode(bool p_use_hardware_transform, const Transform2D &p_tr, Transform2D &r_tr) const; - _FORCE_INLINE_ void _prefill_default_batch(FillState &r_fill_state, int p_command_num); + TransformMode _find_transform_mode(const Transform2D &p_tr) const; + _FORCE_INLINE_ void _prefill_default_batch(FillState &r_fill_state, int p_command_num, const Item &p_item); // light scissoring bool _light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const; @@ -262,12 +270,88 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { ////////////////////////////////////////////////////////////// -_FORCE_INLINE_ void RasterizerCanvasGLES2::_prefill_default_batch(FillState &r_fill_state, int p_command_num) { +// Default batches will not occur in software transform only items +// EXCEPT IN THE CASE OF SINGLE RECTS (and this may well not occur, check the logic in prefill_join_item TYPE_RECT) +// but can occur where transform commands have been sent during hardware batch +_FORCE_INLINE_ void RasterizerCanvasGLES2::_prefill_default_batch(FillState &r_fill_state, int p_command_num, const Item &p_item) { if (r_fill_state.curr_batch->type == Batch::BT_DEFAULT) { - // another default command, just add to the existing batch - r_fill_state.curr_batch->num_commands++; + // don't need to flush an extra transform command? + if (!r_fill_state.transform_extra_command_number_p1) { + // another default command, just add to the existing batch + r_fill_state.curr_batch->num_commands++; + } else { +#ifdef DEBUG_ENABLED + if (r_fill_state.transform_extra_command_number_p1 != p_command_num) { + WARN_PRINT_ONCE("_prefill_default_batch : transform_extra_command_number_p1 != p_command_num"); + } +#endif + // we do have a pending extra transform command to flush + // either the extra transform is in the prior command, or not, in which case we need 2 batches + // if (r_fill_state.transform_extra_command_number_p1 == p_command_num) { + // this should be most common case + r_fill_state.curr_batch->num_commands += 2; + // } else { + // // mad ordering .. does this even happen? + // int extra_command = r_fill_state.transform_extra_command_number_p1 - 1; // plus 1 based + + // // send the extra to the GPU in a batch + // r_fill_state.curr_batch = _batch_request_new(); + // r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + // r_fill_state.curr_batch->first_command = extra_command; + // r_fill_state.curr_batch->num_commands = 1; + + // // start default batch + // r_fill_state.curr_batch = _batch_request_new(); + // r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + // r_fill_state.curr_batch->first_command = p_command_num; + // r_fill_state.curr_batch->num_commands = 1; + // } + + r_fill_state.transform_extra_command_number_p1 = 0; // mark as sent + r_fill_state.extra_matrix_sent = true; + + // the original mode should always be hardware transform .. + // test this assumption + r_fill_state.transform_mode = r_fill_state.orig_transform_mode; + + // do we need to restore anything else? + } } else { // end of previous different type batch, so start new default batch + + // first consider whether there is a dirty extra matrix to send + if (r_fill_state.transform_extra_command_number_p1) { + // get which command the extra is in, and blank all the records as it no longer is stored CPU side + int extra_command = r_fill_state.transform_extra_command_number_p1 - 1; // plus 1 based + r_fill_state.transform_extra_command_number_p1 = 0; + r_fill_state.extra_matrix_sent = true; + + // send the extra to the GPU in a batch + r_fill_state.curr_batch = _batch_request_new(); + r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + r_fill_state.curr_batch->first_command = extra_command; + r_fill_state.curr_batch->num_commands = 1; + + // revert to the original transform mode + // e.g. go back to NONE if we were in hardware transform mode + r_fill_state.transform_mode = r_fill_state.orig_transform_mode; + + // reset the original transform if we are going back to software mode, + // because the extra is now done on the GPU... + // (any subsequent extras are sent directly to the GPU, no deferring) + if (r_fill_state.orig_transform_mode != TM_NONE) { + r_fill_state.transform_combined = p_item.final_transform; + } + + // can possibly combine batch with the next one in some cases + // this is more efficient than having an extra batch especially for the extra + if ((extra_command + 1) == p_command_num) { + r_fill_state.curr_batch->num_commands = 2; + return; + } + } + + // start default batch r_fill_state.curr_batch = _batch_request_new(); r_fill_state.curr_batch->type = Batch::BT_DEFAULT; r_fill_state.curr_batch->first_command = p_command_num; @@ -285,22 +369,16 @@ _FORCE_INLINE_ void RasterizerCanvasGLES2::_software_transform_vertex(Vector2 &r r_v = p_tr.xform(r_v); } -_FORCE_INLINE_ RasterizerCanvasGLES2::TransformMode RasterizerCanvasGLES2::_find_transform_mode(bool p_use_hardware_transform, const Transform2D &p_tr, Transform2D &r_tr) const { - if (!p_use_hardware_transform) { - r_tr = p_tr; - - // decided whether to do translate only for software transform - if ((p_tr.elements[0].x == 1.0) && - (p_tr.elements[0].y == 0.0) && - (p_tr.elements[1].x == 0.0) && - (p_tr.elements[1].y == 1.0)) { - return TM_TRANSLATE; - } else { - return TM_ALL; - } +_FORCE_INLINE_ RasterizerCanvasGLES2::TransformMode RasterizerCanvasGLES2::_find_transform_mode(const Transform2D &p_tr) const { + // decided whether to do translate only for software transform + if ((p_tr.elements[0].x == 1.0) && + (p_tr.elements[0].y == 0.0) && + (p_tr.elements[1].x == 0.0) && + (p_tr.elements[1].y == 1.0)) { + return TM_TRANSLATE; } - return TM_NONE; + return TM_ALL; } #endif // RASTERIZERCANVASGLES2_H From b6d652367b0eca0b26932156d48120967f9952f9 Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Thu, 16 Apr 2020 11:46:52 +0100 Subject: [PATCH 6/7] Items and draw calls added to IDE Monitor in '2d' section This adds 2 new values (items and draw calls) to the performance monitor in a '2d' section, rather than reusing the 3d values in the 'raster' section. This makes it far easier to optimize games to minimize drawcalls. --- drivers/gles2/rasterizer_canvas_gles2.cpp | 15 +++++++++++++++ drivers/gles2/rasterizer_storage_gles2.cpp | 12 ++++++++++++ drivers/gles2/rasterizer_storage_gles2.h | 4 ++++ main/performance.cpp | 8 ++++++++ main/performance.h | 2 ++ scene/main/viewport.cpp | 2 ++ scene/main/viewport.h | 2 ++ servers/visual/visual_server_viewport.cpp | 2 ++ servers/visual_server.cpp | 6 +++++- servers/visual_server.h | 4 ++++ 10 files changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index 73957beb81a9..62d7cfab32c7 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -620,6 +620,8 @@ void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, Rasterizer int num_elements = p_batch.num_commands * 6; glDrawElements(GL_TRIANGLES, num_elements, GL_UNSIGNED_SHORT, (void *)offset); + storage->info.render._2d_draw_call_count++; + switch (tex.tile_mode) { case BatchTex::TILE_FORCE_REPEAT: { state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_FORCE_REPEAT, false); @@ -718,6 +720,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite } #endif } + storage->info.render._2d_draw_call_count++; } break; case Item::Command::TYPE_RECT: { @@ -911,6 +914,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite } state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_FORCE_REPEAT, false); + storage->info.render._2d_draw_call_count++; } break; @@ -1086,6 +1090,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + storage->info.render._2d_draw_call_count++; } break; @@ -1117,6 +1122,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite _bind_canvas_texture(RID(), RID()); _draw_polygon(indices, num_points * 3, num_points + 1, points, NULL, &circle->color, true); + storage->info.render._2d_draw_call_count++; } break; case Item::Command::TYPE_POLYGON: { @@ -1149,6 +1155,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite glDisable(GL_LINE_SMOOTH); } #endif + storage->info.render._2d_draw_call_count++; } break; case Item::Command::TYPE_MESH: { @@ -1212,6 +1219,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite } } + storage->info.render._2d_draw_call_count++; } break; case Item::Command::TYPE_MULTIMESH: { Item::CommandMultiMesh *mmesh = static_cast(command); @@ -1335,6 +1343,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, false); state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, false); + storage->info.render._2d_draw_call_count++; } break; case Item::Command::TYPE_POLYLINE: { Item::CommandPolyLine *pline = static_cast(command); @@ -1386,6 +1395,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite glDisable(GL_LINE_SMOOTH); #endif } + storage->info.render._2d_draw_call_count++; } break; case Item::Command::TYPE_PRIMITIVE: { @@ -1415,6 +1425,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite } _draw_gui_primitive(primitive->points.size(), primitive->points.ptr(), primitive->colors.ptr(), primitive->uvs.ptr()); + storage->info.render._2d_draw_call_count++; } break; case Item::Command::TYPE_TRANSFORM: { @@ -1906,6 +1917,8 @@ bool RasterizerCanvasGLES2::_detect_batch_break(Item *p_ci) { // Legacy non-batched implementation for regression testing. // Should be removed after testing phase to avoid duplicate codepaths. void RasterizerCanvasGLES2::_canvas_render_item(Item *p_ci, RenderItemState &r_ris) { + storage->info.render._2d_item_count++; + if (r_ris.current_clip != p_ci->final_clip_owner) { r_ris.current_clip = p_ci->final_clip_owner; @@ -2269,6 +2282,8 @@ void RasterizerCanvasGLES2::_canvas_render_item(Item *p_ci, RenderItemState &r_r void RasterizerCanvasGLES2::render_joined_item(const BItemJoined &p_bij, RenderItemState &r_ris) { + storage->info.render._2d_item_count++; + // all the joined items will share the same state with the first item Item *ci = bdata.item_refs[p_bij.first_item_ref].item; diff --git a/drivers/gles2/rasterizer_storage_gles2.cpp b/drivers/gles2/rasterizer_storage_gles2.cpp index 7440c41fbd82..bca3528fa98d 100644 --- a/drivers/gles2/rasterizer_storage_gles2.cpp +++ b/drivers/gles2/rasterizer_storage_gles2.cpp @@ -5760,6 +5760,8 @@ void RasterizerStorageGLES2::render_info_end_capture() { info.snap.surface_switch_count = info.render.surface_switch_count - info.snap.surface_switch_count; info.snap.shader_rebind_count = info.render.shader_rebind_count - info.snap.shader_rebind_count; info.snap.vertices_count = info.render.vertices_count - info.snap.vertices_count; + info.snap._2d_item_count = info.render._2d_item_count - info.snap._2d_item_count; + info.snap._2d_draw_call_count = info.render._2d_draw_call_count - info.snap._2d_draw_call_count; } int RasterizerStorageGLES2::get_captured_render_info(VS::RenderInfo p_info) { @@ -5783,6 +5785,12 @@ int RasterizerStorageGLES2::get_captured_render_info(VS::RenderInfo p_info) { case VS::INFO_DRAW_CALLS_IN_FRAME: { return info.snap.draw_call_count; } break; + case VS::INFO_2D_ITEMS_IN_FRAME: { + return info.snap._2d_item_count; + } break; + case VS::INFO_2D_DRAW_CALLS_IN_FRAME: { + return info.snap._2d_draw_call_count; + } break; default: { return get_render_info(p_info); } @@ -5803,6 +5811,10 @@ int RasterizerStorageGLES2::get_render_info(VS::RenderInfo p_info) { return info.render_final.surface_switch_count; case VS::INFO_DRAW_CALLS_IN_FRAME: return info.render_final.draw_call_count; + case VS::INFO_2D_ITEMS_IN_FRAME: + return info.render_final._2d_item_count; + case VS::INFO_2D_DRAW_CALLS_IN_FRAME: + return info.render_final._2d_draw_call_count; case VS::INFO_USAGE_VIDEO_MEM_TOTAL: return 0; //no idea case VS::INFO_VIDEO_MEM_USED: diff --git a/drivers/gles2/rasterizer_storage_gles2.h b/drivers/gles2/rasterizer_storage_gles2.h index eed877c4750a..c33f6952240b 100644 --- a/drivers/gles2/rasterizer_storage_gles2.h +++ b/drivers/gles2/rasterizer_storage_gles2.h @@ -149,6 +149,8 @@ class RasterizerStorageGLES2 : public RasterizerStorage { uint32_t surface_switch_count; uint32_t shader_rebind_count; uint32_t vertices_count; + uint32_t _2d_item_count; + uint32_t _2d_draw_call_count; void reset() { object_count = 0; @@ -157,6 +159,8 @@ class RasterizerStorageGLES2 : public RasterizerStorage { surface_switch_count = 0; shader_rebind_count = 0; vertices_count = 0; + _2d_item_count = 0; + _2d_draw_call_count = 0; } } render, render_final, snap; diff --git a/main/performance.cpp b/main/performance.cpp index d829c6dfdca6..e38d7b4b4b7a 100644 --- a/main/performance.cpp +++ b/main/performance.cpp @@ -63,6 +63,8 @@ void Performance::_bind_methods() { BIND_ENUM_CONSTANT(RENDER_SHADER_CHANGES_IN_FRAME); BIND_ENUM_CONSTANT(RENDER_SURFACE_CHANGES_IN_FRAME); BIND_ENUM_CONSTANT(RENDER_DRAW_CALLS_IN_FRAME); + BIND_ENUM_CONSTANT(RENDER_2D_ITEMS_IN_FRAME); + BIND_ENUM_CONSTANT(RENDER_2D_DRAW_CALLS_IN_FRAME); BIND_ENUM_CONSTANT(RENDER_VIDEO_MEM_USED); BIND_ENUM_CONSTANT(RENDER_TEXTURE_MEM_USED); BIND_ENUM_CONSTANT(RENDER_VERTEX_MEM_USED); @@ -109,6 +111,8 @@ String Performance::get_monitor_name(Monitor p_monitor) const { "raster/shader_changes", "raster/surface_changes", "raster/draw_calls", + "2d/items", + "2d/draw_calls", "video/video_mem", "video/texture_mem", "video/vertex_mem", @@ -147,6 +151,8 @@ float Performance::get_monitor(Monitor p_monitor) const { case RENDER_SHADER_CHANGES_IN_FRAME: return VS::get_singleton()->get_render_info(VS::INFO_SHADER_CHANGES_IN_FRAME); case RENDER_SURFACE_CHANGES_IN_FRAME: return VS::get_singleton()->get_render_info(VS::INFO_SURFACE_CHANGES_IN_FRAME); case RENDER_DRAW_CALLS_IN_FRAME: return VS::get_singleton()->get_render_info(VS::INFO_DRAW_CALLS_IN_FRAME); + case RENDER_2D_ITEMS_IN_FRAME: return VS::get_singleton()->get_render_info(VS::INFO_2D_ITEMS_IN_FRAME); + case RENDER_2D_DRAW_CALLS_IN_FRAME: return VS::get_singleton()->get_render_info(VS::INFO_2D_DRAW_CALLS_IN_FRAME); case RENDER_VIDEO_MEM_USED: return VS::get_singleton()->get_render_info(VS::INFO_VIDEO_MEM_USED); case RENDER_TEXTURE_MEM_USED: return VS::get_singleton()->get_render_info(VS::INFO_TEXTURE_MEM_USED); case RENDER_VERTEX_MEM_USED: return VS::get_singleton()->get_render_info(VS::INFO_VERTEX_MEM_USED); @@ -189,6 +195,8 @@ Performance::MonitorType Performance::get_monitor_type(Monitor p_monitor) const MONITOR_TYPE_QUANTITY, MONITOR_TYPE_QUANTITY, MONITOR_TYPE_QUANTITY, + MONITOR_TYPE_QUANTITY, + MONITOR_TYPE_QUANTITY, MONITOR_TYPE_MEMORY, MONITOR_TYPE_MEMORY, MONITOR_TYPE_MEMORY, diff --git a/main/performance.h b/main/performance.h index 638ddbe9931e..cf1784d6ded6 100644 --- a/main/performance.h +++ b/main/performance.h @@ -69,6 +69,8 @@ class Performance : public Object { RENDER_SHADER_CHANGES_IN_FRAME, RENDER_SURFACE_CHANGES_IN_FRAME, RENDER_DRAW_CALLS_IN_FRAME, + RENDER_2D_ITEMS_IN_FRAME, + RENDER_2D_DRAW_CALLS_IN_FRAME, RENDER_VIDEO_MEM_USED, RENDER_TEXTURE_MEM_USED, RENDER_VERTEX_MEM_USED, diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp index 3474ca6626b1..e4f1e976159d 100644 --- a/scene/main/viewport.cpp +++ b/scene/main/viewport.cpp @@ -3287,6 +3287,8 @@ void Viewport::_bind_methods() { BIND_ENUM_CONSTANT(RENDER_INFO_SHADER_CHANGES_IN_FRAME); BIND_ENUM_CONSTANT(RENDER_INFO_SURFACE_CHANGES_IN_FRAME); BIND_ENUM_CONSTANT(RENDER_INFO_DRAW_CALLS_IN_FRAME); + BIND_ENUM_CONSTANT(RENDER_INFO_2D_ITEMS_IN_FRAME); + BIND_ENUM_CONSTANT(RENDER_INFO_2D_DRAW_CALLS_IN_FRAME); BIND_ENUM_CONSTANT(RENDER_INFO_MAX); BIND_ENUM_CONSTANT(DEBUG_DRAW_DISABLED); diff --git a/scene/main/viewport.h b/scene/main/viewport.h index 79b606cda3b1..da329fdf4365 100644 --- a/scene/main/viewport.h +++ b/scene/main/viewport.h @@ -133,6 +133,8 @@ class Viewport : public Node { RENDER_INFO_SHADER_CHANGES_IN_FRAME, RENDER_INFO_SURFACE_CHANGES_IN_FRAME, RENDER_INFO_DRAW_CALLS_IN_FRAME, + RENDER_INFO_2D_ITEMS_IN_FRAME, + RENDER_INFO_2D_DRAW_CALLS_IN_FRAME, RENDER_INFO_MAX }; diff --git a/servers/visual/visual_server_viewport.cpp b/servers/visual/visual_server_viewport.cpp index df9cef20f9ac..1b909743a7f6 100644 --- a/servers/visual/visual_server_viewport.cpp +++ b/servers/visual/visual_server_viewport.cpp @@ -349,6 +349,8 @@ void VisualServerViewport::draw_viewports() { vp->render_info[VS::VIEWPORT_RENDER_INFO_SHADER_CHANGES_IN_FRAME] = VSG::storage->get_captured_render_info(VS::INFO_SHADER_CHANGES_IN_FRAME); vp->render_info[VS::VIEWPORT_RENDER_INFO_SURFACE_CHANGES_IN_FRAME] = VSG::storage->get_captured_render_info(VS::INFO_SURFACE_CHANGES_IN_FRAME); vp->render_info[VS::VIEWPORT_RENDER_INFO_DRAW_CALLS_IN_FRAME] = VSG::storage->get_captured_render_info(VS::INFO_DRAW_CALLS_IN_FRAME); + vp->render_info[VS::VIEWPORT_RENDER_INFO_2D_ITEMS_IN_FRAME] = VSG::storage->get_captured_render_info(VS::INFO_2D_ITEMS_IN_FRAME); + vp->render_info[VS::VIEWPORT_RENDER_INFO_2D_DRAW_CALLS_IN_FRAME] = VSG::storage->get_captured_render_info(VS::INFO_2D_DRAW_CALLS_IN_FRAME); if (vp->viewport_to_screen_rect != Rect2() && (!vp->viewport_render_direct_to_screen || !VSG::rasterizer->is_low_end())) { //copy to screen if set as such diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index 6685cd7861b3..c749ebc0fb24 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -2188,6 +2188,8 @@ void VisualServer::_bind_methods() { BIND_ENUM_CONSTANT(VIEWPORT_RENDER_INFO_SHADER_CHANGES_IN_FRAME); BIND_ENUM_CONSTANT(VIEWPORT_RENDER_INFO_SURFACE_CHANGES_IN_FRAME); BIND_ENUM_CONSTANT(VIEWPORT_RENDER_INFO_DRAW_CALLS_IN_FRAME); + BIND_ENUM_CONSTANT(VIEWPORT_RENDER_INFO_2D_ITEMS_IN_FRAME); + BIND_ENUM_CONSTANT(VIEWPORT_RENDER_INFO_2D_DRAW_CALLS_IN_FRAME); BIND_ENUM_CONSTANT(VIEWPORT_RENDER_INFO_MAX); BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_DISABLED); @@ -2247,6 +2249,8 @@ void VisualServer::_bind_methods() { BIND_ENUM_CONSTANT(INFO_SHADER_CHANGES_IN_FRAME); BIND_ENUM_CONSTANT(INFO_SURFACE_CHANGES_IN_FRAME); BIND_ENUM_CONSTANT(INFO_DRAW_CALLS_IN_FRAME); + BIND_ENUM_CONSTANT(INFO_2D_ITEMS_IN_FRAME); + BIND_ENUM_CONSTANT(INFO_2D_DRAW_CALLS_IN_FRAME); BIND_ENUM_CONSTANT(INFO_USAGE_VIDEO_MEM_TOTAL); BIND_ENUM_CONSTANT(INFO_VIDEO_MEM_USED); BIND_ENUM_CONSTANT(INFO_TEXTURE_MEM_USED); @@ -2418,7 +2422,7 @@ VisualServer::VisualServer() { GLOBAL_DEF("rendering/gles2/batching/light_scissor_area_threshold", 1.0f); GLOBAL_DEF("rendering/gles2/batching/batch_buffer_size", 16384); GLOBAL_DEF("rendering/gles2/debug/flash_batching", false); - GLOBAL_DEF_RST("rendering/gles2/debug/use_batching_in_editor", false); + GLOBAL_DEF_RST("rendering/gles2/debug/use_batching_in_editor", true); ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles2/batching/max_join_item_commands", PropertyInfo(Variant::INT, "rendering/gles2/batching/max_join_item_commands", PROPERTY_HINT_RANGE, "0,65535")); ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles2/batching/colored_vertex_format_threshold", PropertyInfo(Variant::REAL, "rendering/gles2/batching/colored_vertex_format_threshold", PROPERTY_HINT_RANGE, "0.0,1.0,0.01")); diff --git a/servers/visual_server.h b/servers/visual_server.h index 17f43fda74d6..39e65d23f687 100644 --- a/servers/visual_server.h +++ b/servers/visual_server.h @@ -689,6 +689,8 @@ class VisualServer : public Object { VIEWPORT_RENDER_INFO_SHADER_CHANGES_IN_FRAME, VIEWPORT_RENDER_INFO_SURFACE_CHANGES_IN_FRAME, VIEWPORT_RENDER_INFO_DRAW_CALLS_IN_FRAME, + VIEWPORT_RENDER_INFO_2D_ITEMS_IN_FRAME, + VIEWPORT_RENDER_INFO_2D_DRAW_CALLS_IN_FRAME, VIEWPORT_RENDER_INFO_MAX }; @@ -1016,6 +1018,8 @@ class VisualServer : public Object { INFO_SHADER_CHANGES_IN_FRAME, INFO_SURFACE_CHANGES_IN_FRAME, INFO_DRAW_CALLS_IN_FRAME, + INFO_2D_ITEMS_IN_FRAME, + INFO_2D_DRAW_CALLS_IN_FRAME, INFO_USAGE_VIDEO_MEM_TOTAL, INFO_VIDEO_MEM_USED, INFO_TEXTURE_MEM_USED, From 72adefa5cf9d1633f81165ca9480f4a8849a658b Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Fri, 17 Apr 2020 08:44:12 +0100 Subject: [PATCH 7/7] Add frame diagnostics for GLES2 Batch renderer Added project setting to enable / disable print frame diagnostics every 10 seconds. This prints out a list of batches and info, which is useful to optimize games and identify performance problems. --- doc/classes/Performance.xml | 30 +++++---- doc/classes/ProjectSettings.xml | 5 +- doc/classes/Viewport.xml | 8 ++- doc/classes/VisualServer.xml | 22 ++++-- drivers/gles2/rasterizer_canvas_gles2.cpp | 82 ++++++++++++++++++++++- drivers/gles2/rasterizer_canvas_gles2.h | 11 +++ servers/visual_server.cpp | 1 + 7 files changed, 139 insertions(+), 20 deletions(-) diff --git a/doc/classes/Performance.xml b/doc/classes/Performance.xml index fbbbb5a99ba3..7596e1d29873 100644 --- a/doc/classes/Performance.xml +++ b/doc/classes/Performance.xml @@ -79,40 +79,46 @@ Draw calls per frame. 3D only. - + + Items or joined items drawn per frame. + + + Draw calls per frame. + + The amount of video memory used, i.e. texture and vertex memory combined. - + The amount of texture memory used. - + The amount of vertex memory used. - + Unimplemented in the GLES2 and GLES3 rendering backends, always returns 0. - + Number of active [RigidBody2D] nodes in the game. - + Number of collision pairs in the 2D physics engine. - + Number of islands in the 2D physics engine. - + Number of active [RigidBody] and [VehicleBody] nodes in the game. - + Number of collision pairs in the 3D physics engine. - + Number of islands in the 3D physics engine. - + Output latency of the [AudioServer]. - + Represents the size of the [enum Monitor] enum. diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 713103a29f9c..836c776ecb3c 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -984,10 +984,13 @@ Turns batching on and off. Batching increases performance by reducing the amount of graphics API drawcalls. + + When batching is on, this regularly prints a frame diagnosis log. Note that this will degrade performance. + [b]Experimental[/b] For regression testing against the old renderer. If this is switched on, and [code]use_batching[/code] is set, the renderer will swap alternately between using the old renderer, and the batched renderer, on each frame. This makes it easy to identify visual differences. Performance will be degraded. - + [b]Experimental[/b] Switches on batching within the editor. Use with caution - note that if your editor does not render correctly you may need to edit your [code]project.godot[/code] and remove the use_batching_in_editor setting manually. diff --git a/doc/classes/Viewport.xml b/doc/classes/Viewport.xml index c9afc9b1bf06..c55bc9b6a311 100644 --- a/doc/classes/Viewport.xml +++ b/doc/classes/Viewport.xml @@ -377,7 +377,13 @@ Amount of draw calls in frame. - + + Amount of items or joined items in frame. + + + Amount of draw calls in frame. + + Represents the size of the [enum RenderInfo] enum. diff --git a/doc/classes/VisualServer.xml b/doc/classes/VisualServer.xml index 7db734fc09de..f672222fc460 100644 --- a/doc/classes/VisualServer.xml +++ b/doc/classes/VisualServer.xml @@ -4601,7 +4601,13 @@ Number of draw calls during this frame. - + + Number of 2d items drawn this frame. + + + Number of 2d draw calls during this frame. + + Represents the size of the [enum ViewportRenderInfo] enum. @@ -4748,16 +4754,22 @@ The amount of draw calls in frame. - + + The amount of 2d items in the frame. + + + The amount of 2d draw calls in frame. + + Unimplemented in the GLES2 and GLES3 rendering backends, always returns 0. - + The amount of video memory used, i.e. texture and vertex memory combined. - + The amount of texture memory used. - + The amount of vertex memory used. diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index 62d7cfab32c7..0493ff20f4cd 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -60,9 +60,13 @@ RasterizerCanvasGLES2::BatchData::BatchData() { settings_colored_vertex_format_threshold = 0.0f; settings_batch_buffer_num_verts = 0; scissor_threshold_area = 0.0f; + diagnose_frame = false; + next_diagnose_tick = 10000; + diagnose_frame_number = 9999999999; // some high number settings_use_batching_original_choice = false; settings_flash_batching = false; + settings_diagnose_frame = false; settings_scissor_lights = false; settings_scissor_threshold = -1.0f; } @@ -642,6 +646,32 @@ void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, Rasterizer glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } +void RasterizerCanvasGLES2::diagnose_batches(Item::Command *const *p_commands) { + int num_batches = bdata.batches.size(); + + for (int batch_num = 0; batch_num < num_batches; batch_num++) { + const Batch &batch = bdata.batches[batch_num]; + bdata.frame_string += "\t\tbatch "; + + switch (batch.type) { + case Batch::BT_RECT: { + bdata.frame_string += "R "; + bdata.frame_string += itos(batch.num_commands); + bdata.frame_string += " [" + itos(batch.batch_texture_id) + "]"; + if (batch.num_commands > 1) { + bdata.frame_string += " MULTI\n"; + } else { + bdata.frame_string += "\n"; + } + } break; + default: { + bdata.frame_string += "D "; + bdata.frame_string += itos(batch.num_commands) + "\n"; + } break; + } + } +} + void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material) { int num_batches = bdata.batches.size(); @@ -1562,6 +1592,10 @@ void RasterizerCanvasGLES2::flush_render_batches(Item *p_first_item, Item *p_cur Item::Command *const *commands = p_first_item->commands.ptr(); + if (bdata.diagnose_frame) { + diagnose_batches(commands); + } + render_batches(commands, p_current_clip, r_reclip, p_material); } @@ -1637,6 +1671,33 @@ void RasterizerCanvasGLES2::join_items(Item *p_item_list, int p_z) { } } +void RasterizerCanvasGLES2::canvas_begin() { + // diagnose_frame? + if (bdata.settings_diagnose_frame) { + bdata.diagnose_frame = false; + + uint32_t tick = OS::get_singleton()->get_ticks_msec(); + uint64_t frame = Engine::get_singleton()->get_frames_drawn(); + + if (tick >= bdata.next_diagnose_tick) { + bdata.next_diagnose_tick = tick + 10000; + + // the plus one is prevent starting diagnosis half way through frame + bdata.diagnose_frame_number = frame + 1; + } + + if (frame == bdata.diagnose_frame_number) { + bdata.diagnose_frame = true; + } + + if (bdata.diagnose_frame) { + bdata.frame_string = "canvas_begin FRAME " + itos(frame) + "\n"; + } + } + + RasterizerCanvasBaseGLES2::canvas_begin(); +} + void RasterizerCanvasGLES2::canvas_render_items_begin(const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { // if we are debugging, flash each frame between batching renderer and old version to compare for regressions if (bdata.settings_flash_batching) { @@ -1666,6 +1727,10 @@ void RasterizerCanvasGLES2::canvas_render_items_end() { return; } + if (bdata.diagnose_frame) { + bdata.frame_string += "items\n"; + } + // batching render is deferred until after going through all the z_indices, joining all the items canvas_render_items_implementation(0, 0, _render_item_state.item_group_modulate, _render_item_state.item_group_light, @@ -1673,6 +1738,10 @@ void RasterizerCanvasGLES2::canvas_render_items_end() { bdata.items_joined.reset(); bdata.item_refs.reset(); + + if (bdata.diagnose_frame) { + print_line(bdata.frame_string); + } } void RasterizerCanvasGLES2::canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { @@ -2284,6 +2353,10 @@ void RasterizerCanvasGLES2::render_joined_item(const BItemJoined &p_bij, RenderI storage->info.render._2d_item_count++; + if (bdata.diagnose_frame) { + bdata.frame_string += "\tjoined_item " + itos(p_bij.num_item_refs) + " refs\n"; + } + // all the joined items will share the same state with the first item Item *ci = bdata.item_refs[p_bij.first_item_ref].item; @@ -2798,6 +2871,12 @@ void RasterizerCanvasGLES2::initialize() { bdata.settings_flash_batching = false; } + // frame diagnosis. print out the batches every nth frame + bdata.settings_diagnose_frame = false; + if (!Engine::get_singleton()->is_editor_hint() && bdata.settings_use_batching) { + bdata.settings_diagnose_frame = GLOBAL_GET("rendering/gles2/debug/diagnose_frame"); + } + // the maximum num quads in a batch is limited by GLES2. We can have only 16 bit indices, // which means we can address a vertex buffer of max size 65535. 4 vertices are needed per quad. @@ -2823,7 +2902,8 @@ void RasterizerCanvasGLES2::initialize() { batching_options_string += "\tcolored_vertex_format_threshold " + String(Variant(bdata.settings_colored_vertex_format_threshold)) + "\n"; batching_options_string += "\tbatch_buffer_size " + itos(bdata.settings_batch_buffer_num_verts) + "\n"; batching_options_string += "\tlight_scissor_area_threshold " + String(Variant(bdata.settings_scissor_threshold)) + "\n"; - batching_options_string += "\tdebug_flash " + String(Variant(bdata.settings_flash_batching)); + batching_options_string += "\tdebug_flash " + String(Variant(bdata.settings_flash_batching)) + "\n"; + batching_options_string += "\tdiagnose_frame " + String(Variant(bdata.settings_diagnose_frame)); } else { batching_options_string += "OFF"; } diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index 4de3a197c279..8669545a7a99 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -169,10 +169,17 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { // measured in pixels, recalculated each frame float scissor_threshold_area; + // diagnose this frame, every nTh frame when settings_diagnose_frame is on + bool diagnose_frame; + String frame_string; + uint32_t next_diagnose_tick; + uint64_t diagnose_frame_number; + // global settings bool settings_use_batching; // the current use_batching (affected by flash) bool settings_use_batching_original_choice; // the choice entered in project settings bool settings_flash_batching; // for regression testing, flash between non-batched and batched renderer + bool settings_diagnose_frame; // print out batches to help optimize / regression test int settings_max_join_item_commands; float settings_colored_vertex_format_threshold; int settings_batch_buffer_num_verts; @@ -227,6 +234,7 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { virtual void canvas_render_items_begin(const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); virtual void canvas_render_items_end(); virtual void canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); + virtual void canvas_begin(); private: // legacy codepath .. to remove after testing @@ -263,6 +271,9 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { bool _light_scissor_begin(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect) const; void _calculate_scissor_threshold_area(); + // debug + void diagnose_batches(Item::Command *const *p_commands); + public: void initialize(); RasterizerCanvasGLES2(); diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index c749ebc0fb24..18fcdeee370a 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -2422,6 +2422,7 @@ VisualServer::VisualServer() { GLOBAL_DEF("rendering/gles2/batching/light_scissor_area_threshold", 1.0f); GLOBAL_DEF("rendering/gles2/batching/batch_buffer_size", 16384); GLOBAL_DEF("rendering/gles2/debug/flash_batching", false); + GLOBAL_DEF("rendering/gles2/debug/diagnose_frame", false); GLOBAL_DEF_RST("rendering/gles2/debug/use_batching_in_editor", true); ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles2/batching/max_join_item_commands", PropertyInfo(Variant::INT, "rendering/gles2/batching/max_join_item_commands", PROPERTY_HINT_RANGE, "0,65535"));