diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index f0813fb4ee371d94058c5c3867c21200d864a77b..071a122c9536d4527277dda0e909745e31cc99e9 100644
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -34,13 +34,15 @@
 
 #include <stdlib.h>
 
-inline void* ll_aligned_malloc(size_t size, size_t alignment = 16) // alignment MUST be power-of-two multiple of sizeof(void*).   returned hunk MUST be freed with ll_aligned_free().
+inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free().
 {
 #if defined(LL_WINDOWS)
-	return _mm_malloc(size, alignment);
+	return _mm_malloc(size, 16);
+#elif defined(LL_DARWIN)
+	return malloc(size); // default osx malloc is 16 byte aligned.
 #else
 	void *rtn;
-	if (LL_LIKELY(0 == posix_memalign(&rtn, alignment, size)))
+	if (LL_LIKELY(0 == posix_memalign(&rtn, alignment, 16)))
 	{
 		return rtn;
 	}
@@ -51,10 +53,12 @@ inline void* ll_aligned_malloc(size_t size, size_t alignment = 16) // alignment
 #endif
 }
 
-inline void ll_aligned_free(void *p)
+inline void ll_aligned_free_16(void *p)
 {
 #if defined(LL_WINDOWS)
 	_mm_free(p);
+#elif defined(LL_DARWIN)
+	return free(p);
 #else
 	free(p); // posix_memalign() is compatible with heap deallocator
 #endif
diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp
index d0164c103385a94040220154d8073c3b45b27168..d10e4fee3a0ad3911a8512ff63d824e8a296f955 100644
--- a/indra/newview/llpolymesh.cpp
+++ b/indra/newview/llpolymesh.cpp
@@ -141,7 +141,7 @@ void LLPolyMeshSharedData::freeMeshData()
 		delete [] mDetailTexCoords;
 		mDetailTexCoords = NULL;
 
-		_mm_free(mWeights);
+		ll_aligned_free_16(mWeights);
 		mWeights = NULL;
 	}
 
@@ -231,7 +231,7 @@ BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices )
 	mBaseBinormals = new LLVector3[ numVertices ];
 	mTexCoords = new LLVector2[ numVertices ];
 	mDetailTexCoords = new LLVector2[ numVertices ];
-	mWeights = (F32*) _mm_malloc((numVertices*sizeof(F32)+0xF) & ~0xF, 16);
+	mWeights = (F32*) ll_aligned_malloc_16((numVertices*sizeof(F32)+0xF) & ~0xF);
 	for (i = 0; i < numVertices; i++)
 	{
 		mWeights[i] = 0.f;
@@ -716,7 +716,7 @@ LLPolyMesh::LLPolyMesh(LLPolyMeshSharedData *shared_data, LLPolyMesh *reference_
 		int nfloats = nverts * (2*4 + 3*3 + 2 + 4);
 
 		//use aligned vertex data to make LLPolyMesh SSE friendly
-		mVertexData = (F32*) ll_aligned_malloc(nfloats*4, 16);
+		mVertexData = (F32*) ll_aligned_malloc_16(nfloats*4);
 		int offset = 0;
 
 		//all members must be 16-byte aligned except the last 3
@@ -767,7 +767,7 @@ LLPolyMesh::~LLPolyMesh()
 	delete [] mClothingWeights;
 	delete [] mTexCoords;
 #else
-	ll_aligned_free(mVertexData);
+	ll_aligned_free_16(mVertexData);
 #endif
 }