Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: read_ply replace pandas.read_csv engine=python with c; improve read_off header-parsing robustness #352

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 48 additions & 27 deletions pyntcloud/io/off.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,83 @@
import pandas as pd
import re

import numpy as np
import pandas as pd


def read_off(filename):

with open(filename) as off:

first_line = off.readline()
with open(filename) as f:
first_line = f.readline()
if "OFF" not in first_line:
raise ValueError('The file does not start with the word OFF')
color = True if "C" in first_line else False
raise ValueError("The file does not start with the word OFF")
has_color = "C" in first_line

num_rows = None
n_points = None
n_faces = None
n_header = 1

n_points = 0
n_faces = 0
# Backtrack to account for faulty headers, e.g. "OFF4 4 0".
m = re.match(r"^(?P<prefix>\D+)([\d\s]+)$", first_line)
if m:
f.seek(len(m.group("prefix")))
n_header = 0

count = 1
for line in off:
count += 1
# Read header.
for line in f:
n_header += 1
if line.startswith("#"):
continue
line = line.strip().split()
if len(line) > 1:
n_points = int(line[0])
n_faces = int(line[1])
break
if len(line) <= 1:
continue
n_points = int(line[0])
n_faces = int(line[1])
num_rows = n_points + n_faces
break

if num_rows is None:
raise ValueError("The file does not contain a valid header")

if (n_points == 0):
raise ValueError('The file has no points')
if n_points == 0:
raise ValueError("The file contains no points")

data = {}
point_names = ["x", "y", "z"]
point_types = {'x': np.float32, 'y': np.float32, 'z': np.float32}
point_types = {"x": np.float32, "y": np.float32, "z": np.float32}

if color:
if has_color:
point_names.extend(["red", "green", "blue"])
point_types = dict(point_types, **{'red': np.uint8, 'green': np.uint8, 'blue': np.uint8})
color_point_types = {"red": np.uint8, "green": np.uint8, "blue": np.uint8}
point_types = {**point_types, **color_point_types}

data["points"] = pd.read_csv(
off,
f,
sep=" ",
header=None,
engine="c",
nrows=n_points,
names=point_names,
dtype=point_types,
index_col=False,
comment="#"
comment="#",
)

assert len(data["points"]) == n_points

f.seek(0)

data["mesh"] = pd.read_csv(
filename,
f,
sep=" ",
header=None,
engine="c",
skiprows=(count + n_points),
skiprows=n_header + n_points,
nrows=n_faces,
usecols=[1, 2, 3],
names=["v1", "v2", "v3"],
comment="#"
comment="#",
)
return data

assert len(data["mesh"]) == n_faces

return data
27 changes: 23 additions & 4 deletions pyntcloud/io/ply.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import numpy as np
import pandas as pd
from collections import defaultdict
from contextlib import contextmanager
from io import StringIO
from itertools import islice


sys_byteorder = ('>', '<')[sys.byteorder == 'little']

Expand Down Expand Up @@ -132,8 +136,13 @@ def read_ply(filename, allow_bool=False):

names = [x[0] for x in dtypes["vertex"]]

data["points"] = pd.read_csv(filename, sep=" ", header=None, engine="python",
skiprows=top, skipfooter=bottom, usecols=names, names=names)
with open(filename, 'r') as f:
lines = f.readlines()

with _file_from_lines(lines, top, len(lines) - bottom) as f:
data["points"] = pd.read_csv(
f, sep=" ", header=None, usecols=names, names=names
)

for n, col in enumerate(data["points"].columns):
data["points"][col] = data["points"][col].astype(
Expand All @@ -146,8 +155,10 @@ def read_ply(filename, allow_bool=False):
usecols = [1, 2, 3, 5, 6, 7, 8, 9, 10] if has_texture else [1, 2, 3]
names = names[usecols]

data["mesh"] = pd.read_csv(
filename, sep=" ", header=None, engine="python", skiprows=top, usecols=usecols, names=names)
with _file_from_lines(lines, top) as f:
data["mesh"] = pd.read_csv(
f, sep=" ", header=None, usecols=usecols, names=names
)

for n, col in enumerate(data["mesh"].columns):
data["mesh"][col] = data["mesh"][col].astype(
Expand Down Expand Up @@ -261,3 +272,11 @@ def describe_element(name, df):
element.append('property ' + f + ' ' + df.columns.values[i])

return element


@contextmanager
def _file_from_lines(lines, start=None, stop=None):
with StringIO() as f:
f.writelines("".join(islice(lines, start, stop)))
f.seek(0)
yield f