Fluid: Updated manta pp files

Updates include: - A fix from Jacques that changed the loop order in the mesh creation function (the fix speeds up the function significantly due to fewer cache misses). - Some of the grid copy helper functions are now multithreaded. - A fix for Windows file IO. Now it possible to load files with non ASCII characters on Windows too.
2020-02-21 15:29:32 +01:00 · 2020-02-21 15:29:32 +01:00 · b8af5e1062
parent ec5fc1adcc
commit b8af5e1062
4 changed files with 418 additions and 36 deletions
--- a/extern/mantaflow/preprocessed/fileio/ioutil.cpp
+++ b/extern/mantaflow/preprocessed/fileio/ioutil.cpp
@ -23,21 +23,36 @@ extern "C" {
 #  include <zlib.h>
 }

+#  if defined(WIN32) || defined(_WIN32)
+#    include <windows.h>
+#    include <string>
+#  endif
+
+using namespace std;
+
 namespace Manta {

-//! helper to handle non ascii filenames correctly, mainly problematic on windows
+#  if defined(WIN32) || defined(_WIN32)
+static wstring stringToWstring(const char *str)
+{
+  const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str, strlen(str), NULL, 0);
+  wstring strWide(length_wc, 0);
+  MultiByteToWideChar(CP_UTF8, 0, str, strlen(str), &strWide[0], length_wc);
+  return strWide;
+}
+#  endif
+
 void *safeGzopen(const char *filename, const char *mode)
 {
  gzFile gzfile;
-#  if 0
-  UTF16_ENCODE(filename);

-  // gzopen_w() is supported since zlib v1.2.7
-  gzfile = gzopen_w(filename_16, mode);
-  UTF16_UN_ENCODE(filename);
+#  if defined(WIN32) || defined(_WIN32)
+  wstring filenameWide = stringToWstring(filename);
+  gzfile = gzopen_w(filenameWide.c_str(), mode);
 #  else
  gzfile = gzopen(filename, mode);
 #  endif
+
  return gzfile;
 }
 #endif
--- a/extern/mantaflow/preprocessed/gitinfo.h
+++ b/extern/mantaflow/preprocessed/gitinfo.h
@ -1,3 +1,3 @@


-#define MANTA_GIT_VERSION "commit 7b9e0d841274c65dce911ec578bd0b4779971422"
+#define MANTA_GIT_VERSION "commit ce000bcbd7004e6549ac2f118755fcdc1f679bc3"
--- a/extern/mantaflow/preprocessed/grid.cpp
+++ b/extern/mantaflow/preprocessed/grid.cpp
@ -1244,15 +1244,67 @@ void PbRegister_gridMaxDiffVec3()
 }
 }

+struct knCopyMacToVec3 : public KernelBase {
+  knCopyMacToVec3(MACGrid &source, Grid<Vec3> &target)
+      : KernelBase(&source, 0), source(source), target(target)
+  {
+    runMessage();
+    run();
+  }
+  inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target) const
+  {
+    target(i, j, k) = source(i, j, k);
+  }
+  inline MACGrid &getArg0()
+  {
+    return source;
+  }
+  typedef MACGrid type0;
+  inline Grid<Vec3> &getArg1()
+  {
+    return target;
+  }
+  typedef Grid<Vec3> type1;
+  void runMessage()
+  {
+    debMsg("Executing kernel knCopyMacToVec3 ", 3);
+    debMsg("Kernel range"
+               << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
+           4);
+  };
+  void operator()(const tbb::blocked_range<IndexInt> &__r) const
+  {
+    const int _maxX = maxX;
+    const int _maxY = maxY;
+    if (maxZ > 1) {
+      for (int k = __r.begin(); k != (int)__r.end(); k++)
+        for (int j = 0; j < _maxY; j++)
+          for (int i = 0; i < _maxX; i++)
+            op(i, j, k, source, target);
+    }
+    else {
+      const int k = 0;
+      for (int j = __r.begin(); j != (int)__r.end(); j++)
+        for (int i = 0; i < _maxX; i++)
+          op(i, j, k, source, target);
+    }
+  }
+  void run()
+  {
+    if (maxZ > 1)
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
+    else
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
+  }
+  MACGrid &source;
+  Grid<Vec3> &target;
+};
 // simple helper functions to copy (convert) mac to vec3 , and levelset to real grids
 // (are assumed to be the same for running the test cases - in general they're not!)

 void copyMacToVec3(MACGrid &source, Grid<Vec3> &target)
 {
-  FOR_IJK(target)
-  {
-    target(i, j, k) = source(i, j, k);
-  }
+  knCopyMacToVec3(source, target);
 }
 static PyObject *_W_3(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
 {
@ -1323,10 +1375,14 @@ void PbRegister_convertMacToVec3()
 }
 }

-//! vec3->mac grid conversion , but with full resampling
-void resampleVec3ToMac(Grid<Vec3> &source, MACGrid &target)
-{
-  FOR_IJK_BND(target, 1)
+struct knResampleVec3ToMac : public KernelBase {
+  knResampleVec3ToMac(Grid<Vec3> &source, MACGrid &target)
+      : KernelBase(&source, 1), source(source), target(target)
+  {
+    runMessage();
+    run();
+  }
+  inline void op(int i, int j, int k, Grid<Vec3> &source, MACGrid &target) const
  {
    target(i, j, k)[0] = 0.5 * (source(i - 1, j, k)[0] + source(i, j, k))[0];
    target(i, j, k)[1] = 0.5 * (source(i, j - 1, k)[1] + source(i, j, k))[1];
@ -1334,6 +1390,55 @@ void resampleVec3ToMac(Grid<Vec3> &source, MACGrid &target)
      target(i, j, k)[2] = 0.5 * (source(i, j, k - 1)[2] + source(i, j, k))[2];
    }
  }
+  inline Grid<Vec3> &getArg0()
+  {
+    return source;
+  }
+  typedef Grid<Vec3> type0;
+  inline MACGrid &getArg1()
+  {
+    return target;
+  }
+  typedef MACGrid type1;
+  void runMessage()
+  {
+    debMsg("Executing kernel knResampleVec3ToMac ", 3);
+    debMsg("Kernel range"
+               << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
+           4);
+  };
+  void operator()(const tbb::blocked_range<IndexInt> &__r) const
+  {
+    const int _maxX = maxX;
+    const int _maxY = maxY;
+    if (maxZ > 1) {
+      for (int k = __r.begin(); k != (int)__r.end(); k++)
+        for (int j = 1; j < _maxY; j++)
+          for (int i = 1; i < _maxX; i++)
+            op(i, j, k, source, target);
+    }
+    else {
+      const int k = 0;
+      for (int j = __r.begin(); j != (int)__r.end(); j++)
+        for (int i = 1; i < _maxX; i++)
+          op(i, j, k, source, target);
+    }
+  }
+  void run()
+  {
+    if (maxZ > 1)
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
+    else
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
+  }
+  Grid<Vec3> &source;
+  MACGrid &target;
+};
+//! vec3->mac grid conversion , but with full resampling
+
+void resampleVec3ToMac(Grid<Vec3> &source, MACGrid &target)
+{
+  knResampleVec3ToMac(source, target);
 }
 static PyObject *_W_5(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
 {
@ -1367,13 +1472,66 @@ void PbRegister_resampleVec3ToMac()
 }
 }

-//! mac->vec3 grid conversion , with full resampling
-void resampleMacToVec3(MACGrid &source, Grid<Vec3> &target)
-{
-  FOR_IJK_BND(target, 1)
+struct knResampleMacToVec3 : public KernelBase {
+  knResampleMacToVec3(MACGrid &source, Grid<Vec3> &target)
+      : KernelBase(&source, 1), source(source), target(target)
+  {
+    runMessage();
+    run();
+  }
+  inline void op(int i, int j, int k, MACGrid &source, Grid<Vec3> &target) const
  {
    target(i, j, k) = source.getCentered(i, j, k);
  }
+  inline MACGrid &getArg0()
+  {
+    return source;
+  }
+  typedef MACGrid type0;
+  inline Grid<Vec3> &getArg1()
+  {
+    return target;
+  }
+  typedef Grid<Vec3> type1;
+  void runMessage()
+  {
+    debMsg("Executing kernel knResampleMacToVec3 ", 3);
+    debMsg("Kernel range"
+               << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
+           4);
+  };
+  void operator()(const tbb::blocked_range<IndexInt> &__r) const
+  {
+    const int _maxX = maxX;
+    const int _maxY = maxY;
+    if (maxZ > 1) {
+      for (int k = __r.begin(); k != (int)__r.end(); k++)
+        for (int j = 1; j < _maxY; j++)
+          for (int i = 1; i < _maxX; i++)
+            op(i, j, k, source, target);
+    }
+    else {
+      const int k = 0;
+      for (int j = __r.begin(); j != (int)__r.end(); j++)
+        for (int i = 1; i < _maxX; i++)
+          op(i, j, k, source, target);
+    }
+  }
+  void run()
+  {
+    if (maxZ > 1)
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
+    else
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(1, maxY), *this);
+  }
+  MACGrid &source;
+  Grid<Vec3> &target;
+};
+//! mac->vec3 grid conversion , with full resampling
+
+void resampleMacToVec3(MACGrid &source, Grid<Vec3> &target)
+{
+  knResampleMacToVec3(source, target);
 }
 static PyObject *_W_6(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
 {
@ -1407,12 +1565,65 @@ void PbRegister_resampleMacToVec3()
 }
 }

-void copyLevelsetToReal(LevelsetGrid &source, Grid<Real> &target)
-{
-  FOR_IJK(target)
+struct knCopyLevelsetToReal : public KernelBase {
+  knCopyLevelsetToReal(LevelsetGrid &source, Grid<Real> &target)
+      : KernelBase(&source, 0), source(source), target(target)
+  {
+    runMessage();
+    run();
+  }
+  inline void op(int i, int j, int k, LevelsetGrid &source, Grid<Real> &target) const
  {
    target(i, j, k) = source(i, j, k);
  }
+  inline LevelsetGrid &getArg0()
+  {
+    return source;
+  }
+  typedef LevelsetGrid type0;
+  inline Grid<Real> &getArg1()
+  {
+    return target;
+  }
+  typedef Grid<Real> type1;
+  void runMessage()
+  {
+    debMsg("Executing kernel knCopyLevelsetToReal ", 3);
+    debMsg("Kernel range"
+               << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
+           4);
+  };
+  void operator()(const tbb::blocked_range<IndexInt> &__r) const
+  {
+    const int _maxX = maxX;
+    const int _maxY = maxY;
+    if (maxZ > 1) {
+      for (int k = __r.begin(); k != (int)__r.end(); k++)
+        for (int j = 0; j < _maxY; j++)
+          for (int i = 0; i < _maxX; i++)
+            op(i, j, k, source, target);
+    }
+    else {
+      const int k = 0;
+      for (int j = __r.begin(); j != (int)__r.end(); j++)
+        for (int i = 0; i < _maxX; i++)
+          op(i, j, k, source, target);
+    }
+  }
+  void run()
+  {
+    if (maxZ > 1)
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
+    else
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
+  }
+  LevelsetGrid &source;
+  Grid<Real> &target;
+};
+
+void copyLevelsetToReal(LevelsetGrid &source, Grid<Real> &target)
+{
+  knCopyLevelsetToReal(source, target);
 }
 static PyObject *_W_7(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
 {
@ -1446,17 +1657,95 @@ void PbRegister_copyLevelsetToReal()
 }
 }

-void copyVec3ToReal(Grid<Vec3> &source,
-                    Grid<Real> &targetX,
-                    Grid<Real> &targetY,
-                    Grid<Real> &targetZ)
-{
-  FOR_IJK(source)
+struct knCopyVec3ToReal : public KernelBase {
+  knCopyVec3ToReal(Grid<Vec3> &source,
+                   Grid<Real> &targetX,
+                   Grid<Real> &targetY,
+                   Grid<Real> &targetZ)
+      : KernelBase(&source, 0),
+        source(source),
+        targetX(targetX),
+        targetY(targetY),
+        targetZ(targetZ)
+  {
+    runMessage();
+    run();
+  }
+  inline void op(int i,
+                 int j,
+                 int k,
+                 Grid<Vec3> &source,
+                 Grid<Real> &targetX,
+                 Grid<Real> &targetY,
+                 Grid<Real> &targetZ) const
  {
    targetX(i, j, k) = source(i, j, k).x;
    targetY(i, j, k) = source(i, j, k).y;
    targetZ(i, j, k) = source(i, j, k).z;
  }
+  inline Grid<Vec3> &getArg0()
+  {
+    return source;
+  }
+  typedef Grid<Vec3> type0;
+  inline Grid<Real> &getArg1()
+  {
+    return targetX;
+  }
+  typedef Grid<Real> type1;
+  inline Grid<Real> &getArg2()
+  {
+    return targetY;
+  }
+  typedef Grid<Real> type2;
+  inline Grid<Real> &getArg3()
+  {
+    return targetZ;
+  }
+  typedef Grid<Real> type3;
+  void runMessage()
+  {
+    debMsg("Executing kernel knCopyVec3ToReal ", 3);
+    debMsg("Kernel range"
+               << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
+           4);
+  };
+  void operator()(const tbb::blocked_range<IndexInt> &__r) const
+  {
+    const int _maxX = maxX;
+    const int _maxY = maxY;
+    if (maxZ > 1) {
+      for (int k = __r.begin(); k != (int)__r.end(); k++)
+        for (int j = 0; j < _maxY; j++)
+          for (int i = 0; i < _maxX; i++)
+            op(i, j, k, source, targetX, targetY, targetZ);
+    }
+    else {
+      const int k = 0;
+      for (int j = __r.begin(); j != (int)__r.end(); j++)
+        for (int i = 0; i < _maxX; i++)
+          op(i, j, k, source, targetX, targetY, targetZ);
+    }
+  }
+  void run()
+  {
+    if (maxZ > 1)
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
+    else
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
+  }
+  Grid<Vec3> &source;
+  Grid<Real> &targetX;
+  Grid<Real> &targetY;
+  Grid<Real> &targetZ;
+};
+
+void copyVec3ToReal(Grid<Vec3> &source,
+                    Grid<Real> &targetX,
+                    Grid<Real> &targetY,
+                    Grid<Real> &targetZ)
+{
+  knCopyVec3ToReal(source, targetX, targetY, targetZ);
 }
 static PyObject *_W_8(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
 {
@ -1492,17 +1781,95 @@ void PbRegister_copyVec3ToReal()
 }
 }

-void copyRealToVec3(Grid<Real> &sourceX,
-                    Grid<Real> &sourceY,
-                    Grid<Real> &sourceZ,
-                    Grid<Vec3> &target)
-{
-  FOR_IJK(target)
+struct knCopyRealToVec3 : public KernelBase {
+  knCopyRealToVec3(Grid<Real> &sourceX,
+                   Grid<Real> &sourceY,
+                   Grid<Real> &sourceZ,
+                   Grid<Vec3> &target)
+      : KernelBase(&sourceX, 0),
+        sourceX(sourceX),
+        sourceY(sourceY),
+        sourceZ(sourceZ),
+        target(target)
+  {
+    runMessage();
+    run();
+  }
+  inline void op(int i,
+                 int j,
+                 int k,
+                 Grid<Real> &sourceX,
+                 Grid<Real> &sourceY,
+                 Grid<Real> &sourceZ,
+                 Grid<Vec3> &target) const
  {
    target(i, j, k).x = sourceX(i, j, k);
    target(i, j, k).y = sourceY(i, j, k);
    target(i, j, k).z = sourceZ(i, j, k);
  }
+  inline Grid<Real> &getArg0()
+  {
+    return sourceX;
+  }
+  typedef Grid<Real> type0;
+  inline Grid<Real> &getArg1()
+  {
+    return sourceY;
+  }
+  typedef Grid<Real> type1;
+  inline Grid<Real> &getArg2()
+  {
+    return sourceZ;
+  }
+  typedef Grid<Real> type2;
+  inline Grid<Vec3> &getArg3()
+  {
+    return target;
+  }
+  typedef Grid<Vec3> type3;
+  void runMessage()
+  {
+    debMsg("Executing kernel knCopyRealToVec3 ", 3);
+    debMsg("Kernel range"
+               << " x " << maxX << " y " << maxY << " z " << minZ << " - " << maxZ << " ",
+           4);
+  };
+  void operator()(const tbb::blocked_range<IndexInt> &__r) const
+  {
+    const int _maxX = maxX;
+    const int _maxY = maxY;
+    if (maxZ > 1) {
+      for (int k = __r.begin(); k != (int)__r.end(); k++)
+        for (int j = 0; j < _maxY; j++)
+          for (int i = 0; i < _maxX; i++)
+            op(i, j, k, sourceX, sourceY, sourceZ, target);
+    }
+    else {
+      const int k = 0;
+      for (int j = __r.begin(); j != (int)__r.end(); j++)
+        for (int i = 0; i < _maxX; i++)
+          op(i, j, k, sourceX, sourceY, sourceZ, target);
+    }
+  }
+  void run()
+  {
+    if (maxZ > 1)
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(minZ, maxZ), *this);
+    else
+      tbb::parallel_for(tbb::blocked_range<IndexInt>(0, maxY), *this);
+  }
+  Grid<Real> &sourceX;
+  Grid<Real> &sourceY;
+  Grid<Real> &sourceZ;
+  Grid<Vec3> &target;
+};
+
+void copyRealToVec3(Grid<Real> &sourceX,
+                    Grid<Real> &sourceY,
+                    Grid<Real> &sourceZ,
+                    Grid<Vec3> &target)
+{
+  knCopyRealToVec3(sourceX, sourceY, sourceZ, target);
 }
 static PyObject *_W_9(PyObject *_self, PyObject *_linargs, PyObject *_kwds)
 {
--- a/extern/mantaflow/preprocessed/levelset.cpp
+++ b/extern/mantaflow/preprocessed/levelset.cpp
@ -773,9 +773,9 @@ void LevelsetGrid::createMesh(Mesh &mesh)
  Grid<int> edgeVY(mParent);
  Grid<int> edgeVZ(mParent);

-  for (int i = 0; i < mSize.x - 1; i++)
+  for (int k = 0; k < mSize.z - 1; k++)
    for (int j = 0; j < mSize.y - 1; j++)
-      for (int k = 0; k < mSize.z - 1; k++) {
+      for (int i = 0; i < mSize.x - 1; i++) {
        Real value[8] = {get(i, j, k),
                         get(i + 1, j, k),
                         get(i + 1, j + 1, k),