Skip to content

Commit

Permalink
Merge pull request #57 from Roche/dev
Browse files Browse the repository at this point in the history
fixed issue with character columns with missing values
  • Loading branch information
ofajardo authored Apr 22, 2020
2 parents 9abcf71 + 8d270d6 commit 8ac732a
Show file tree
Hide file tree
Showing 14 changed files with 1,664 additions and 1,264 deletions.
3 changes: 3 additions & 0 deletions change_log.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# 0.3.2 (github, pypi and conda 20200422)
* fixed bug when writing string columns with missing values (#54 and #55)

# 0.3.1 (github, pypi and conda 20200406
* Updated Readstat to commit a71b4e80dae729e236d63d42066e79ca646cd23b (on dev)
* Updated Readstat update fixes #3
Expand Down
Binary file modified docs/_build/doctrees/environment.pickle
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/_build/html/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 69b5999a1a0ed1969de459deea5093e3
config: 3c3bb2a4bec2d6479578309a09db4796
tags: 645f666f9bcd5a90fca523b33c5a78b7
2 changes: 1 addition & 1 deletion docs/_build/html/_static/documentation_options.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
VERSION: '0.3.1',
VERSION: '0.3.2',
LANGUAGE: 'None',
COLLAPSE_INDEX: false,
BUILDER: 'html',
Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/genindex.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Index &mdash; pyreadstat 0.3.1 documentation</title>
<title>Index &mdash; pyreadstat 0.3.2 documentation</title>



Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Welcome to pyreadstat’s documentation! &mdash; pyreadstat 0.3.1 documentation</title>
<title>Welcome to pyreadstat’s documentation! &mdash; pyreadstat 0.3.2 documentation</title>



Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/py-modindex.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Python Module Index &mdash; pyreadstat 0.3.1 documentation</title>
<title>Python Module Index &mdash; pyreadstat 0.3.2 documentation</title>



Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/search.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Search &mdash; pyreadstat 0.3.1 documentation</title>
<title>Search &mdash; pyreadstat 0.3.2 documentation</title>



Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = '0.3.1'
release = '0.3.2'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pyreadstat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
from .pyreadstat import read_file_in_chunks
from ._readstat_parser import ReadstatError, metadata_container

__version__ = "0.3.1"
__version__ = "0.3.2"
2,825 changes: 1,576 additions & 1,249 deletions pyreadstat/_readstat_writer.c

Large diffs are not rendered by default.

17 changes: 15 additions & 2 deletions pyreadstat/_readstat_writer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,11 @@ cdef list get_pandas_column_types(object df, dict missing_user_values):
elif curtype == np.bool:
result.append((PYWRITER_LOGICAL, 0, is_missing))
elif curtype == str:
max_length = get_pandas_str_series_max_length(curseries)
if is_missing:
col = curseries.dropna().reset_index(drop=True)
max_length = get_pandas_str_series_max_length(col)
else:
max_length = get_pandas_str_series_max_length(curseries)
result.append((PYWRITER_CHARACTER, max_length, is_missing))
elif curtype == datetime.date:
result.append((PYWRITER_DATE, 0, is_missing))
Expand All @@ -255,7 +259,12 @@ cdef list get_pandas_column_types(object df, dict missing_user_values):
elif curtype == datetime.time:
result.append((PYWRITER_TIME, 0, is_missing))
else:
max_length = get_pandas_str_series_max_length(curseries.astype(str))
curseries = curseries.astype(str)
if is_missing:
col = curseries.dropna().reset_index(drop=True)
max_length = get_pandas_str_series_max_length(col.astype(str))
else:
max_length = get_pandas_str_series_max_length(curseries.astype(str))
result.append((PYWRITER_OBJECT, max_length, is_missing))

else:
Expand Down Expand Up @@ -609,6 +618,10 @@ cdef int run_write(df, str filename_path, dst_file_format file_format, str file_
#if file_format == FILE_FORMAT_XPORT and curtype == PYWRITER_DOUBLE:
# max_length = 8
variable_name = col_names[col_indx]
if type(variable_name) != str:
raise PyreadstatError("variable name %s is of type %s and it must be str (not starting with numbers!)" % (variable_name, str(type(variable_name))))
if not variable_name[0].isalpha():
raise PyreadstatError("variable name %s starts with an illegal (non-alphabetic) character" % variable_name)
variable = readstat_add_variable(writer, variable_name.encode("utf-8"), pandas_to_readstat_types[curtype], max_length)
if curtype in pyrwriter_datetimelike_types:
curformat = get_datetimelike_format_for_readstat(file_format, curtype)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@

setup(
name='pyreadstat',
version='0.3.1',
version='0.3.2',
description=short_description,
author="Otto Fajardo",
author_email="pleasecontactviagithub@notvalid.com",
Expand Down
65 changes: 61 additions & 4 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# limitations under the License.
# #############################################################################

from datetime import datetime
from datetime import datetime, timedelta
import unittest
import os
import sys
Expand Down Expand Up @@ -120,7 +120,9 @@ def _prepare_data(self):
df_nodates_sastata["myord"] = df_nodates_sastata["myord"].astype(float)
df_nodates_sastata["mylabl"] = df_nodates_sastata["mylabl"].astype(float)
self.df_nodates_sastata = df_nodates_sastata


# character column with nan and object column with nan (object pyreadstat writer doesn't know what to do with)
self.df_charnan = pd.DataFrame([[0,np.nan,np.nan],[1,"test", timedelta]], columns = ["integer", "string", "object"])

def setUp(self):

Expand Down Expand Up @@ -717,7 +719,7 @@ def test_zsav_write_dates(self):
#if sys.version_info[0] < 3:
# return

path = os.path.join(self.write_folder, "dates_write.sav")
path = os.path.join(self.write_folder, "dates_write_zsav.sav")
pyreadstat.write_sav(self.df_sas_dates, path, compress=True)
df, meta = pyreadstat.read_sav(path)
self.assertTrue(df.equals(self.df_sas_dates))
Expand All @@ -741,7 +743,62 @@ def test_xport_write_dates(self):
pyreadstat.write_xport(self.df_sas_dates, path)
df, meta = pyreadstat.read_xport(path)
self.assertTrue(df.equals(self.df_sas_dates))


def test_sav_write_charnan(self):
path = os.path.join(self.write_folder, "charnan.sav")
pyreadstat.write_sav(self.df_charnan, path)
df, meta = pyreadstat.read_sav(path)
df2 = self.df_charnan
df2.iloc[0,1] = ""
df2.iloc[0,2] = ""
df2['integer'] = df2["integer"].astype(float)
df2['object'] = df2['object'].astype(str)
self.assertTrue(df2.equals(df))

def test_zsav_write_charnan(self):
path = os.path.join(self.write_folder, "charnan_zsav.sav")
pyreadstat.write_sav(self.df_charnan, path, compress=True)
df, meta = pyreadstat.read_sav(path)
df2 = self.df_charnan
df2.iloc[0,1] = ""
df2.iloc[0,2] = ""
df2['integer'] = df2["integer"].astype(float)
df2['object'] = df2['object'].astype(str)
self.assertTrue(df2.equals(df))

def test_xport_write_charnan(self):
path = os.path.join(self.write_folder, "charnan.xpt")
pyreadstat.write_xport(self.df_charnan, path)
df, meta = pyreadstat.read_xport(path)
df2 = self.df_charnan
df2.iloc[0,1] = ""
df2.iloc[0,2] = ""
df2['integer'] = df2["integer"].astype(float)
df2['object'] = df2['object'].astype(str)
self.assertTrue(df2.equals(df))

def test_por_write_charnan(self):
path = os.path.join(self.write_folder, "charnan_zsav.por")
pyreadstat.write_por(self.df_charnan, path)
df, meta = pyreadstat.read_por(path)
df.columns = [x.lower() for x in df.columns]
df2 = self.df_charnan
df2.iloc[0,1] = ""
df2.iloc[0,2] = ""
df2['integer'] = df2["integer"].astype(float)
df2['object'] = df2['object'].astype(str)
self.assertTrue(df2.equals(df))

def test_dta_write_charnan(self):
path = os.path.join(self.write_folder, "charnan.dta")
pyreadstat.write_dta(self.df_charnan, path)
df, meta = pyreadstat.read_dta(path)
df2 = self.df_charnan
df2.iloc[0,1] = ""
df2.iloc[0,2] = ""
df2['integer'] = df2["integer"].astype(float)
df2['object'] = df2['object'].astype(str)
self.assertTrue(df2.equals(df))

if __name__ == '__main__':

Expand Down

0 comments on commit 8ac732a

Please sign in to comment.