Improve the detection of large integers

mwouts · Jun 24, 2024 · a2044f4 · a2044f4
1 parent d92fbab
commit a2044f4
Show file tree

Hide file tree

Showing 5 changed files with 53 additions and 5 deletions.
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,6 +1,13 @@
 ITables ChangeLog
 =================
 
+2.1.3 (2024-06-22)
+------------------
+
+**Fixed**
+- We have improved the detection of large integers in the context of Polars DataFrames ([#291](https://github.com/mwouts/itables/issues/291))
+
+
 2.1.2 (2024-06-19)
 ------------------
 
@@ -9,7 +16,7 @@ ITables ChangeLog
 an automatic horizontal scrolling in Jupyter, Jupyter Book and also Streamlit if the table is too wide ([#282](https://github.com/mwouts/itables/pull/282)).
 
 **Fixed**
-- The dependencies of the streamlit components have been updated to fix a vulnerability in `ws` ([Alert #1](https://github.com/mwouts/itables/security/dependabot/1))
+- The dependencies of the streamlit components have been updated to fix a vulnerability in `ws` ([Alert 1](https://github.com/mwouts/itables/security/dependabot/1))
 
 
 2.1.1 (2024-06-08)

diff --git a/src/itables/datatables_format.py b/src/itables/datatables_format.py
@@ -129,7 +129,15 @@ def datatables_rows(df, count=None, warn_on_unexpected_types=False, pure_json=Fa
 def n_suffix_for_bigints(js, pure_json=False):
     def n_suffix(matchobj):
         if pure_json:
-            return '"' + matchobj.group(1) + '"' + matchobj.group(2)
-        return 'BigInt("' + matchobj.group(1) + '")' + matchobj.group(2)
+            return matchobj.group(1) + '"' + matchobj.group(2) + '"' + matchobj.group(3)
+        return (
+            matchobj.group(1)
+            + 'BigInt("'
+            + matchobj.group(2)
+            + '")'
+            + matchobj.group(3)
+        )
 
-    return re.sub(r"(-?\d{16,})(,|])", n_suffix, js)
+    big_int_re = re.compile(r"^([\[\s]+)(-?\d{16,})(\]*)$")
+    parts = js.split(",")
+    return ",".join(re.sub(big_int_re, n_suffix, part) for part in parts)
diff --git a/src/itables/version.py b/src/itables/version.py
@@ -1,3 +1,3 @@
 """ITables' version number"""
 
-__version__ = "2.1.2"
+__version__ = "2.1.3"
diff --git a/tests/test_datatables_format.py b/tests/test_datatables_format.py
@@ -154,3 +154,19 @@ def test_encode_max_int(large):
 def test_encode_not_max_int(large):
     large //= 10
     assert n_suffix_for_bigints(json.dumps([large])) == "[{}]".format(large)
+
+
+def test_encode_mixed_contents():
+    # Make sure that the bigint escape works for mixed content # 291
+    df = pd.DataFrame(
+        {
+            "bigint": [1666767918216000000],
+            "int": [1699300000000],
+            "float": [0.9510565400123596],
+            "neg": [-0.30901700258255005],
+        }
+    )
+    assert (
+        datatables_rows(df)
+        == '[[BigInt("1666767918216000000"), 1699300000000, 0.951057, -0.309017]]'
+    )
diff --git a/tests/test_polars.py b/tests/test_polars.py
@@ -1,6 +1,7 @@
 import pytest
 
 from itables import to_html_datatable
+from itables.javascript import datatables_rows
 from itables.sample_dfs import get_dict_of_test_dfs, get_dict_of_test_series
 
 try:
@@ -21,3 +22,19 @@ def test_show_polars_series(name, x, use_to_html):
 )
 def test_show_polars_df(name, df, use_to_html):
     to_html_datatable(df, use_to_html)
+
+
+def test_encode_mixed_contents():
+    # Make sure that the bigint escape works for mixed content # 291
+    df = polars.DataFrame(
+        {
+            "bigint": [1666767918216000000],
+            "int": [1699300000000],
+            "float": [0.9510565400123596],
+            "neg": [-0.30901700258255005],
+        }
+    )
+    assert (
+        datatables_rows(df)
+        == '[[BigInt("1666767918216000000"), 1699300000000, 0.9510565400123596, -0.30901700258255005]]'
+    )