The part in red below might be problematic;
def find_duplicates( *geo ):
Return a list of duplicate pairs for all the objects in the supplied list *geo
known = {}
dupes = []
for item in geo:
key = bbox_hash( item )
if not key in known:
known[key] = item
dupes.append ( ( known[key] , item ) )
possibles = set( [k for k in itertools.chain( *dupes )] )
geo_known = {}
dupes = []
for item in possibles:
key = geo_hash( item )
if not key in geo_known:
geo_known[key] = item
dupes.append ( ( geo_known[key] , item ) )
return dupes
If dupes were [ ( A, B ), ( C, D ) ], and geo_hash( B ) == geo_hash( C ), then the final result would be [ ( B, C ) ]. Even though bbox_hash( B ) != bbox_hash( C )!
It can probably be replaced by something like this instead:
dupes2 = []
for object1, object2 in dupes:
if geo_hash( object1 ) == geo_hash( object2 ):
dupes2.append( ( object1, object2 ) )
return dupes2
But then, what if there are more than 2 duplicate instances? Say; A == B == C?
More test iterations needed?
The following is some modified version of code above using SQLite just for illustration (untested!):
import maya.cmds as cmds
import itertools
import sqlite3
initSQL = '''
CREATE TABLE `myobjects` (
`bb_x` FLOAT, `bb_y` FLOAT, `bb_z` FLOAT,
`polycount` INTEGER,
`geo_x` FLOAT, `geo_y` FLOAT, `geo_z` FLOAT,
`vertexcount` INTEGER
CREATE INDEX `myobjects_criterion_index` ON `myobjects` (
`bb_x`, `bb_y`, `bb_z`, `polycount`, `geo_x`, `geo_y`, `geo_z`, `vertexcount`
groupQuery = '''
SELECT `bb_x`, `bb_y`, `bb_z`, `polycount`, `geo_x`, `geo_y`, `geo_z`, `vertexcount`, COUNT( * ) `count` FROM `myobjects`
GROUP BY `bb_x`, `bb_y`, `bb_z`, `polycount`, `geo_x`, `geo_y`, `geo_z`, `vertexcount`
HAVING `count` > 1
duplicatesQuery = '''
SELECT `name` FROM `myobjects`
WHERE `bb_x` = ? AND `bb_y` = ? AND `bb_z` = ? AND `polycount` = ? AND `geo_x` = ? AND `geo_y` = ? AND `geo_z` = ? AND `vertexcount` = ?
_clamp = lambda p: hash ( int( p * 100 ) * .01 )
def bbox_values( obj ):
bb = cmds.xform( obj, q=True, bb=True, ws=True )
sum_x, sum_y, sum_z = 0, 0, 0
for pos in range ( 0, len( bb ), 3 ):
x, y, z = [ _clamp ( p ) for p in bb[pos:pos + 3]]
sum_x = sum_x + x
sum_y = sum_y + y
sum_z = sum_z + z
count = int( cmds.polyEvaluate( obj, t=True ) ) # good ol' maya - polyEvaluate retuns a string instead of raising!
except ValueError:
count = 1
return sum_x, sum_y, sum_z, count
def geo_values( obj ):
sum_x, sum_y, sum_z = 0, 0, 0
verts = cmds.xform( obj + ".vtx[li]", q=True, t=True, ws=True )
[/li] for pos in range ( 0, len( verts ), 3 ):
x, y, z = [ _clamp ( p ) for p in verts[pos:pos + 3]]
sum_x = sum_x + x
sum_y = sum_y + y
sum_z = sum_z + z
return sum_x, sum_y, sum_z, len( verts )
def find_duplicates( objects ):
duplicates = []
connection = sqlite3.connect( ":memory:" )
cursor1 = connection.cursor()
cursor2 = connection.cursor()
cursor1.executescript( initSQL )
for object in objects:
bb_x, bb_y, bb_z, polycount = bbox_values( object )
geo_x, geo_y, geo_z, vertexcount = geo_values( object )
"INSERT INTO `myobjects` VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ? )",
( object, bb_x, bb_y, bb_z, polycount, geo_x, geo_y, geo_z, vertexcount )
cursor1.execute( groupQuery )
for bb_x, bb_y, bb_z, polycount, geo_x, geo_y, geo_z, vertexcount, duplicateCount in cursor1:
group = []
cursor2.execute( duplicatesQuery, ( bb_x, bb_y, bb_z, polycount, geo_x, geo_y, geo_z, vertexcount ) )
for ( name, ) in cursor2:
group.append( name )
duplicates.append( group )
return duplicates
def find_all_duplicates():
Call find_duplicates on all mesh objects in the scene
meshes = type='mesh', l=True, ni=True )
if not meshes: return []
xforms = cmds.listRelatives( meshes, p=True, f=True )
return find_duplicates ( xforms )
This will be a bit slower due to geo_values() is calculated for every objects upfront. But there will be no hash(ing) necessary, no need to worry about hash collisions and hence it’s more accurate. Also handles more than 2 duplicate instances gracefully.
The SQLite is the most suitable database for this particular case because it exists in (local process) memory. No need to setup separate database server first. No inter-process/inter-physical-machine client-server data transfer overhead, no heavy concurrency management overhead. Therefore should be adequately fast also.