Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make global variables thread-safe in the extension #19

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ jobs:
matrix:
ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }}
os: [ ubuntu-latest, macos-latest ]

# CRuby < 2.6 does not support macos-arm64, so test those on amd64 instead
exclude:
- { os: macos-latest, ruby: '2.5' }
include:
- { os: macos-13, ruby: '2.5' }
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
Expand Down
186 changes: 93 additions & 93 deletions ext/nkf/nkf-utf8/nkf.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,9 @@ struct input_code{
int _file_stat;
};

static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
static nkf_encoding *input_encoding = NULL;
static nkf_encoding *output_encoding = NULL;
static RB_THREAD_LOCAL_SPECIFIER const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
static RB_THREAD_LOCAL_SPECIFIER nkf_encoding *input_encoding = NULL;
static RB_THREAD_LOCAL_SPECIFIER nkf_encoding *output_encoding = NULL;

#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
/* UCS Mapping
Expand All @@ -348,22 +348,22 @@ static nkf_encoding *output_encoding = NULL;
#define UCS_MAP_MS 1
#define UCS_MAP_CP932 2
#define UCS_MAP_CP10001 3
static int ms_ucs_map_f = UCS_MAP_ASCII;
static RB_THREAD_LOCAL_SPECIFIER int ms_ucs_map_f = UCS_MAP_ASCII;
#endif
#ifdef UTF8_INPUT_ENABLE
/* no NEC special, NEC-selected IBM extended and IBM extended characters */
static int no_cp932ext_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int no_cp932ext_f = FALSE;
/* ignore ZERO WIDTH NO-BREAK SPACE */
static int no_best_fit_chars_f = FALSE;
static int input_endian = ENDIAN_BIG;
static int input_bom_f = FALSE;
static nkf_char unicode_subchar = '?'; /* the regular substitution character */
static void (*encode_fallback)(nkf_char c) = NULL;
static RB_THREAD_LOCAL_SPECIFIER int no_best_fit_chars_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int input_endian = ENDIAN_BIG;
static RB_THREAD_LOCAL_SPECIFIER int input_bom_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER nkf_char unicode_subchar = '?'; /* the regular substitution character */
static RB_THREAD_LOCAL_SPECIFIER void (*encode_fallback)(nkf_char c) = NULL;
static void w_status(struct input_code *, nkf_char);
#endif
#ifdef UTF8_OUTPUT_ENABLE
static int output_bom_f = FALSE;
static int output_endian = ENDIAN_BIG;
static RB_THREAD_LOCAL_SPECIFIER int output_bom_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int output_endian = ENDIAN_BIG;
#endif

static void std_putc(nkf_char c);
Expand All @@ -380,43 +380,43 @@ static void mime_putc(nkf_char c);
/* buffers */

#if !defined(PERL_XS) && !defined(WIN32DLL)
static unsigned char stdibuf[IOBUF_SIZE];
static unsigned char stdobuf[IOBUF_SIZE];
static RB_THREAD_LOCAL_SPECIFIER unsigned char stdibuf[IOBUF_SIZE];
static RB_THREAD_LOCAL_SPECIFIER unsigned char stdobuf[IOBUF_SIZE];
#endif

#define NKF_UNSPECIFIED (-TRUE)

/* flags */
static int unbuf_f = FALSE;
static int estab_f = FALSE;
static int nop_f = FALSE;
static int binmode_f = TRUE; /* binary mode */
static int rot_f = FALSE; /* rot14/43 mode */
static int hira_f = FALSE; /* hira/kata henkan */
static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
static int mime_decode_f = FALSE; /* mime decode is explicitly on */
static int mimebuf_f = FALSE; /* MIME buffered input */
static int broken_f = FALSE; /* convert ESC-less broken JIS */
static int iso8859_f = FALSE; /* ISO8859 through */
static int mimeout_f = FALSE; /* base64 mode */
static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
static RB_THREAD_LOCAL_SPECIFIER int unbuf_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int estab_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int nop_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int binmode_f = TRUE; /* binary mode */
static RB_THREAD_LOCAL_SPECIFIER int rot_f = FALSE; /* rot14/43 mode */
static RB_THREAD_LOCAL_SPECIFIER int hira_f = FALSE; /* hira/kata henkan */
static RB_THREAD_LOCAL_SPECIFIER int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
static RB_THREAD_LOCAL_SPECIFIER int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
static RB_THREAD_LOCAL_SPECIFIER int mime_decode_f = FALSE; /* mime decode is explicitly on */
static RB_THREAD_LOCAL_SPECIFIER int mimebuf_f = FALSE; /* MIME buffered input */
static RB_THREAD_LOCAL_SPECIFIER int broken_f = FALSE; /* convert ESC-less broken JIS */
static RB_THREAD_LOCAL_SPECIFIER int iso8859_f = FALSE; /* ISO8859 through */
static RB_THREAD_LOCAL_SPECIFIER int mimeout_f = FALSE; /* base64 mode */
static RB_THREAD_LOCAL_SPECIFIER int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
static RB_THREAD_LOCAL_SPECIFIER int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */

#ifdef UNICODE_NORMALIZATION
static int nfc_f = FALSE;
static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
static RB_THREAD_LOCAL_SPECIFIER int nfc_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
#endif

#ifdef INPUT_OPTION
static int cap_f = FALSE;
static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
static RB_THREAD_LOCAL_SPECIFIER int cap_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;

static int url_f = FALSE;
static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
static RB_THREAD_LOCAL_SPECIFIER int url_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
#endif

#define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
Expand All @@ -434,40 +434,40 @@ static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
#define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))

#ifdef NUMCHAR_OPTION
static int numchar_f = FALSE;
static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
static RB_THREAD_LOCAL_SPECIFIER int numchar_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
#endif

#ifdef CHECK_OPTION
static int noout_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int noout_f = FALSE;
static void no_putc(nkf_char c);
static int debug_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int debug_f = FALSE;
static void debug(const char *str);
static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
#endif

static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
static RB_THREAD_LOCAL_SPECIFIER int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
static void set_input_codename(const char *codename);

#ifdef EXEC_IO
static int exec_f = 0;
static RB_THREAD_LOCAL_SPECIFIER int exec_f = 0;
#endif

#ifdef SHIFTJIS_CP932
/* invert IBM extended characters to others */
static int cp51932_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int cp51932_f = FALSE;

/* invert NEC-selected IBM extended characters to IBM extended characters */
static int cp932inv_f = TRUE;
static RB_THREAD_LOCAL_SPECIFIER int cp932inv_f = TRUE;

/* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
#endif /* SHIFTJIS_CP932 */

static int x0212_f = FALSE;
static int x0213_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int x0212_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int x0213_f = FALSE;

static unsigned char prefix_table[256];
static RB_THREAD_LOCAL_SPECIFIER unsigned char prefix_table[256];

static void e_status(struct input_code *, nkf_char);
static void s_status(struct input_code *, nkf_char);
Expand All @@ -483,28 +483,28 @@ struct input_code input_code_list[] = {
{NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
};

static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
static int base64_count = 0;
static RB_THREAD_LOCAL_SPECIFIER int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
static RB_THREAD_LOCAL_SPECIFIER int base64_count = 0;

/* X0208 -> ASCII converter */

/* fold parameter */
static int f_line = 0; /* chars in line */
static int f_prev = 0;
static int fold_preserve_f = FALSE; /* preserve new lines */
static int fold_f = FALSE;
static int fold_len = 0;
static RB_THREAD_LOCAL_SPECIFIER int f_line = 0; /* chars in line */
static RB_THREAD_LOCAL_SPECIFIER int f_prev = 0;
static RB_THREAD_LOCAL_SPECIFIER int fold_preserve_f = FALSE; /* preserve new lines */
static RB_THREAD_LOCAL_SPECIFIER int fold_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int fold_len = 0;

/* options */
static unsigned char kanji_intro = DEFAULT_J;
static unsigned char ascii_intro = DEFAULT_R;
static RB_THREAD_LOCAL_SPECIFIER unsigned char kanji_intro = DEFAULT_J;
static RB_THREAD_LOCAL_SPECIFIER unsigned char ascii_intro = DEFAULT_R;

/* Folding */

#define FOLD_MARGIN 10
#define DEFAULT_FOLD 60

static int fold_margin = FOLD_MARGIN;
static RB_THREAD_LOCAL_SPECIFIER int fold_margin = FOLD_MARGIN;

/* process default */

Expand All @@ -522,40 +522,40 @@ no_connection(nkf_char c2, nkf_char c1)
no_connection2(c2,c1,0);
}

static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
static RB_THREAD_LOCAL_SPECIFIER void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;

static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
static RB_THREAD_LOCAL_SPECIFIER void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
static RB_THREAD_LOCAL_SPECIFIER void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
static RB_THREAD_LOCAL_SPECIFIER void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
static RB_THREAD_LOCAL_SPECIFIER void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
static RB_THREAD_LOCAL_SPECIFIER void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
static RB_THREAD_LOCAL_SPECIFIER void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
static RB_THREAD_LOCAL_SPECIFIER void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;

/* static redirections */

static void (*o_putc)(nkf_char c) = std_putc;
static RB_THREAD_LOCAL_SPECIFIER void (*o_putc)(nkf_char c) = std_putc;

static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;

static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;

static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
static RB_THREAD_LOCAL_SPECIFIER void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */

static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;

/* for strict mime */
static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
static RB_THREAD_LOCAL_SPECIFIER nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;

/* Global states */
static int output_mode = ASCII; /* output kanji mode */
static int input_mode = ASCII; /* input kanji mode */
static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
static RB_THREAD_LOCAL_SPECIFIER int output_mode = ASCII; /* output kanji mode */
static RB_THREAD_LOCAL_SPECIFIER int input_mode = ASCII; /* input kanji mode */
static RB_THREAD_LOCAL_SPECIFIER int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */

/* X0201 / X0208 conversion tables */

Expand Down Expand Up @@ -665,20 +665,20 @@ static const unsigned char fv[] = {



static int option_mode = 0;
static int file_out_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int option_mode = 0;
static RB_THREAD_LOCAL_SPECIFIER int file_out_f = FALSE;
#ifdef OVERWRITE
static int overwrite_f = FALSE;
static int preserve_time_f = FALSE;
static int backup_f = FALSE;
static char *backup_suffix = "";
static RB_THREAD_LOCAL_SPECIFIER int overwrite_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int preserve_time_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER int backup_f = FALSE;
static RB_THREAD_LOCAL_SPECIFIER char *backup_suffix = "";
#endif

static int eolmode_f = 0; /* CR, LF, CRLF */
static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
static nkf_char prev_cr = 0; /* CR or 0 */
static RB_THREAD_LOCAL_SPECIFIER int eolmode_f = 0; /* CR, LF, CRLF */
static RB_THREAD_LOCAL_SPECIFIER int input_eol = 0; /* 0: unestablished, EOF: MIXED */
static RB_THREAD_LOCAL_SPECIFIER nkf_char prev_cr = 0; /* CR or 0 */
#ifdef EASYWIN /*Easy Win */
static int end_check;
static RB_THREAD_LOCAL_SPECIFIER int end_check;
#endif /*Easy Win */
Copy link
Member Author

@andrykonchin andrykonchin Jul 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't analyse thoroughly which global variables are modified and which aren't so updated declaration of all the static global variables.


static void *
Expand Down
29 changes: 21 additions & 8 deletions ext/nkf/nkf.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,30 @@
#undef FALSE
#define putchar(c) rb_nkf_putchar(c)

#ifndef RB_THREAD_LOCAL_SPECIFIER
# if __STDC_VERSION__ >= 201112
# define RB_THREAD_LOCAL_SPECIFIER _Thread_local
# elif defined(__GNUC__)
/* note that ICC (linux) and Clang are covered by __GNUC__ */
# define RB_THREAD_LOCAL_SPECIFIER __thread
# endif
#endif
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic is borrowed from the thread_pthread.h file. On Ruby master it was removed in ruby/ruby@f99af43 and RB_THREAD_LOCAL_SPECIFIER is declared instead.

So on Ruby 3.4 we rely on the available out of the box RB_THREAD_LOCAL_SPECIFIER and on earlier versions the previous logic is used. This way thread-local declarations should work on both existing Ruby versions and Ruby 3.4.


#ifndef RB_THREAD_LOCAL_SPECIFIER
# define RB_THREAD_LOCAL_SPECIFIER
#endif

/* Input/Output pointers */

static unsigned char *output;
static unsigned char *input;
static int input_ctr;
static int i_len;
static int output_ctr;
static int o_len;
static int incsize;
static RB_THREAD_LOCAL_SPECIFIER unsigned char *output;
static RB_THREAD_LOCAL_SPECIFIER unsigned char *input;
static RB_THREAD_LOCAL_SPECIFIER int input_ctr;
static RB_THREAD_LOCAL_SPECIFIER int i_len;
static RB_THREAD_LOCAL_SPECIFIER int output_ctr;
static RB_THREAD_LOCAL_SPECIFIER int o_len;
static RB_THREAD_LOCAL_SPECIFIER int incsize;

static VALUE result;
static RB_THREAD_LOCAL_SPECIFIER VALUE result;

static int
rb_nkf_putchar(unsigned int c)
Expand Down