feat: small perf improvements

This commit is contained in:
dusk 2024-08-26 16:57:09 +03:00
parent c34e2b72bf
commit b970959e65
Signed by: dusk
SSH Key Fingerprint: SHA256:Abmvag+juovVufZTxyWY8KcVgrznxvBjQpJesv071Aw
4 changed files with 36 additions and 33 deletions

View File

@ -8,6 +8,15 @@ const EPSILON: float = 0.00001
var SIMULATION_RATE: int = 1 var SIMULATION_RATE: int = 1
var flocks: Dictionary = {} var flocks: Dictionary = {}
var total_boid_count: int = 0:
set(new_count):
total_boid_count = new_count
args_array.resize(total_boid_count)
forces_array.resize(total_boid_count)
# create our arrays for parallel processing
var args_array: Array[Dictionary] = []
var forces_array: PackedVector3Array = []
func _ready() -> void: func _ready() -> void:
get_tree().node_added.connect(_register_flock) get_tree().node_added.connect(_register_flock)
@ -15,6 +24,9 @@ func _ready() -> void:
_init_register_flock() _init_register_flock()
args_array.resize(total_boid_count)
forces_array.resize(total_boid_count)
func _init_register_flock(node: Node = get_tree().root) -> void: func _init_register_flock(node: Node = get_tree().root) -> void:
_register_flock(node) _register_flock(node)
for child: Node in node.get_children(): for child: Node in node.get_children():
@ -36,40 +48,34 @@ func _physics_process(delta: float) -> void:
_process_boids() _process_boids()
func _process_boids() -> void: func _process_boids() -> void:
# organize the work into tasks var total_parallel_tasks := total_boid_count / PARALLELIZATION_RATE
if total_boid_count % PARALLELIZATION_RATE > 0: total_parallel_tasks += 1
var boid_count := 0 var boid_count := 0
var boids_array_idx := 0 # organize the work into tasks
var args_arrays: Array[Array] = [[]]
var force_arrays: Array[PackedVector3Array] = [PackedVector3Array([])]
for flock: Flock in flocks.values(): for flock: Flock in flocks.values():
var flock_args := _pack_calc_args_flock(flock) var flock_args := _pack_calc_args_flock(flock)
for boid in flock.boids.values(): for boid in flock.boids.values():
var args := _pack_calc_args_boid(boid, flock_args.duplicate()) var args := _pack_calc_args_boid(boid, flock_args.duplicate())
args_arrays[boids_array_idx].append(args) args_array[boid_count] = args
force_arrays[boids_array_idx].append(Vector3.ZERO) forces_array[boid_count] = Vector3.ZERO
boid_count += 1 boid_count += 1
if boid_count > PARALLELIZATION_RATE:
boid_count = 0
boids_array_idx += 1
args_arrays.append([])
force_arrays.append(PackedVector3Array([]))
# distribute tasks to threads # distribute tasks to threads
# TODO: calculate on main thread if there arent enough boids to warrant doing this # TODO: calculate on main thread if there arent enough boids to warrant doing this
var calc_task := WorkerThreadPool.add_group_task( var calc_task := WorkerThreadPool.add_group_task(
_calculate_boid_parallel.bind(args_arrays, force_arrays), _calculate_boid_parallel,
args_arrays.size(), total_parallel_tasks,
args_arrays.size(), total_parallel_tasks,
true, true,
) )
WorkerThreadPool.wait_for_group_task_completion(calc_task) WorkerThreadPool.wait_for_group_task_completion(calc_task)
# apply the forces # apply the forces
for idx in args_arrays.size(): var idx := 0
var args = args_arrays[idx] for force in forces_array:
var forces = force_arrays[idx] args_array[idx].boid.apply_force(force)
for iidx in args.size(): idx += 1
args[iidx].boid.apply_force(forces[iidx])
func _pack_calc_args_flock(flock: Flock) -> Dictionary: func _pack_calc_args_flock(flock: Flock) -> Dictionary:
var num_of_boids := flock.boids.size() var num_of_boids := flock.boids.size()
@ -98,13 +104,13 @@ func _pack_calc_args_boid(boid, args: Dictionary) -> Dictionary:
args['self_pos'] = boid._get_boid_position() args['self_pos'] = boid._get_boid_position()
return args return args
func _calculate_boid_parallel(idx: int, read_from: Array[Array], write_to: Array[PackedVector3Array]) -> void: func _calculate_boid_parallel(idx: int) -> void:
var args = read_from[idx] var start_from := PARALLELIZATION_RATE * idx
var forces = write_to[idx] var end_at := mini(start_from + PARALLELIZATION_RATE, total_boid_count)
var arg_idx := 0 var arg_idx := start_from
for arg in args: while arg_idx < end_at:
var force = _calculate_boid(arg) var force = _calculate_boid(args_array[arg_idx])
forces[arg_idx] = force forces_array[arg_idx] = force
arg_idx += 1 arg_idx += 1
func _calculate_boid(args: Dictionary) -> Vector3: func _calculate_boid(args: Dictionary) -> Vector3:
@ -130,7 +136,7 @@ func _calculate_boid(args: Dictionary) -> Vector3:
for aboid_pos in others_pos: for aboid_pos in others_pos:
# faster for when checking, we can just sqrt later for calculating steering # faster for when checking, we can just sqrt later for calculating steering
var dist = boid_pos.distance_squared_to(aboid_pos) var dist = boid_pos.distance_squared_to(aboid_pos)
if dist >= EPSILON: if dist > EPSILON:
if dist < goal_seperation: if dist < goal_seperation:
var diff = (boid_pos - aboid_pos).normalized() / sqrt(dist) var diff = (boid_pos - aboid_pos).normalized() / sqrt(dist)
steer += diff; steer_count += 1 steer += diff; steer_count += 1

View File

@ -26,9 +26,11 @@ func _register_boid(maybe_boid: Node) -> void:
if maybe_boid is not Boid2D and maybe_boid is not Boid3D: return if maybe_boid is not Boid2D and maybe_boid is not Boid3D: return
maybe_boid.flock = self maybe_boid.flock = self
boids[maybe_boid.get_instance_id()] = maybe_boid boids[maybe_boid.get_instance_id()] = maybe_boid
BoidManager.total_boid_count += 1
print_verbose("[", self, "]", " boid ", maybe_boid, " registered") print_verbose("[", self, "]", " boid ", maybe_boid, " registered")
func _unregister_boid(maybe_boid: Node) -> void: func _unregister_boid(maybe_boid: Node) -> void:
if maybe_boid is not Boid2D and maybe_boid is not Boid3D: return if maybe_boid is not Boid2D and maybe_boid is not Boid3D: return
boids.erase(maybe_boid.get_instance_id()) boids.erase(maybe_boid.get_instance_id())
BoidManager.total_boid_count -= 1
print_verbose("[", self, "]", " boid ", maybe_boid, " unregistered") print_verbose("[", self, "]", " boid ", maybe_boid, " unregistered")

View File

@ -2,7 +2,7 @@ extends Node2D
func _ready() -> void: func _ready() -> void:
for flock in get_children(): for flock in get_children():
for i in 100: spawnBoid(flock) for i in 1000: spawnBoid(flock)
func spawnBoid(flock: Flock) -> void: func spawnBoid(flock: Flock) -> void:
var boid: Boid2D = preload("../example_boid.tscn").instantiate() var boid: Boid2D = preload("../example_boid.tscn").instantiate()

View File

@ -26,11 +26,6 @@ settings/stdout/verbose_stdout=true
enabled=PackedStringArray("res://addons/boids/plugin.cfg") enabled=PackedStringArray("res://addons/boids/plugin.cfg")
[physics]
2d/run_on_separate_thread=true
3d/run_on_separate_thread=true
[rendering] [rendering]
renderer/rendering_method="gl_compatibility" renderer/rendering_method="gl_compatibility"