The part in red below might be problematic;
def find_duplicates( *geo ):
'''
Return a list of duplicate pairs for all the objects in the supplied list *geo
'''
known = {}
dupes = []
for item in geo:
key = bbox_hash( item )
if not key in known:
known[key] = item
else:
dupes.append ( ( known[key] , item ) )
possibles = set( [k for k in itertools.chain( *dupes )] )
geo_known = {}
dupes = []
for item in possibles:
key = geo_hash( item )
if not key in geo_known:
geo_known[key] = item
else:
dupes.append ( ( geo_known[key] , item ) )
return dupes
If dupes were [ ( A, B ), ( C, D ) ], and geo_hash( B ) == geo_hash( C ), then the final result would be [ ( B, C ) ]. Even though bbox_hash( B ) != bbox_hash( C )!
It can probably be replaced by something like this instead:
dupes2 = []
for object1, object2 in dupes:
if geo_hash( object1 ) == geo_hash( object2 ):
dupes2.append( ( object1, object2 ) )
return dupes2
But then, what if there are more than 2 duplicate instances? Say; A == B == C?
More test iterations needed?
–
The following is some modified version of code above using SQLite just for illustration (untested!):
import maya.cmds as cmds
import itertools
import sqlite3
initSQL = '''
CREATE TABLE `myobjects` (
`name` TEXT PRIMARY KEY,
`bb_x` FLOAT, `bb_y` FLOAT, `bb_z` FLOAT,
`polycount` INTEGER,
`geo_x` FLOAT, `geo_y` FLOAT, `geo_z` FLOAT,
`vertexcount` INTEGER
);
CREATE INDEX `myobjects_criterion_index` ON `myobjects` (
`bb_x`, `bb_y`, `bb_z`, `polycount`, `geo_x`, `geo_y`, `geo_z`, `vertexcount`
);
'''
groupQuery = '''
SELECT `bb_x`, `bb_y`, `bb_z`, `polycount`, `geo_x`, `geo_y`, `geo_z`, `vertexcount`, COUNT( * ) `count` FROM `myobjects`
GROUP BY `bb_x`, `bb_y`, `bb_z`, `polycount`, `geo_x`, `geo_y`, `geo_z`, `vertexcount`
HAVING `count` > 1
'''
duplicatesQuery = '''
SELECT `name` FROM `myobjects`
WHERE `bb_x` = ? AND `bb_y` = ? AND `bb_z` = ? AND `polycount` = ? AND `geo_x` = ? AND `geo_y` = ? AND `geo_z` = ? AND `vertexcount` = ?
'''
_clamp = lambda p: hash ( int( p * 100 ) * .01 )
def bbox_values( obj ):
bb = cmds.xform( obj, q=True, bb=True, ws=True )
sum_x, sum_y, sum_z = 0, 0, 0
for pos in range ( 0, len( bb ), 3 ):
x, y, z = [ _clamp ( p ) for p in bb[pos:pos + 3]]
sum_x = sum_x + x
sum_y = sum_y + y
sum_z = sum_z + z
try:
count = int( cmds.polyEvaluate( obj, t=True ) ) # good ol' maya - polyEvaluate retuns a string instead of raising!
except ValueError:
count = 1
return sum_x, sum_y, sum_z, count
def geo_values( obj ):
sum_x, sum_y, sum_z = 0, 0, 0
verts = cmds.xform( obj + ".vtx[li]", q=True, t=True, ws=True )
[/li] for pos in range ( 0, len( verts ), 3 ):
x, y, z = [ _clamp ( p ) for p in verts[pos:pos + 3]]
sum_x = sum_x + x
sum_y = sum_y + y
sum_z = sum_z + z
return sum_x, sum_y, sum_z, len( verts )
def find_duplicates( objects ):
duplicates = []
connection = sqlite3.connect( ":memory:" )
cursor1 = connection.cursor()
cursor2 = connection.cursor()
cursor1.executescript( initSQL )
for object in objects:
bb_x, bb_y, bb_z, polycount = bbox_values( object )
geo_x, geo_y, geo_z, vertexcount = geo_values( object )
cursor1.execute(
"INSERT INTO `myobjects` VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ? )",
( object, bb_x, bb_y, bb_z, polycount, geo_x, geo_y, geo_z, vertexcount )
)
cursor1.execute( groupQuery )
for bb_x, bb_y, bb_z, polycount, geo_x, geo_y, geo_z, vertexcount, duplicateCount in cursor1:
group = []
cursor2.execute( duplicatesQuery, ( bb_x, bb_y, bb_z, polycount, geo_x, geo_y, geo_z, vertexcount ) )
for ( name, ) in cursor2:
group.append( name )
duplicates.append( group )
cursor2.close()
cursor1.close()
connection.close()
return duplicates
def find_all_duplicates():
'''
Call find_duplicates on all mesh objects in the scene
'''
meshes = cmds.ls( type='mesh', l=True, ni=True )
if not meshes: return []
xforms = cmds.listRelatives( meshes, p=True, f=True )
return find_duplicates ( xforms )
This will be a bit slower due to geo_values() is calculated for every objects upfront. But there will be no hash(ing) necessary, no need to worry about hash collisions and hence it’s more accurate. Also handles more than 2 duplicate instances gracefully.
–
The SQLite is the most suitable database for this particular case because it exists in (local process) memory. No need to setup separate database server first. No inter-process/inter-physical-machine client-server data transfer overhead, no heavy concurrency management overhead. Therefore should be adequately fast also.