/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1997--2021 The R Core Team * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ * * EXPORTS: * * OneIndex() -- used for "[[<-" in ./subassign.c * get1index() -- used for "[[" in ./subassign.c & subset.c * vectorIndex() -- used for "[[" and "[[<-" with a vector arg * mat2indsub() -- for "mat[i]" " " " * strmat2intmat() -- for "mat[]" " " " * makeSubscript() -- for "[" and "[<-" in ./subset.c and ./subassign.c, * and "[[<-" with a scalar in ./subassign.c * arraySubscript() -- for "[i,j,..." and "[<-..." in ./subset.c, ./subassign.c */ #ifdef HAVE_CONFIG_H #include #endif #include #include /* interval at which to check interrupts, a guess (~subsecond on current hw) */ #define NINTERRUPT 10000000 /* We might get a call with R_NilValue from subassignment code */ #define ECALL(call, yy) if(call == R_NilValue) error(yy); else errorcall(call, yy); #define ECALL3(call, yy, A) if(call == R_NilValue) error(yy, A); else errorcall(call, yy, A); #define ECALL4(call, yy, A, B) if(call == R_NilValue) error(yy, A, B); else errorcall(call, yy, A, B); static void NORET ECALL_OutOfBounds(SEXP x, int subscript, R_xlen_t index, SEXP call) { if (call == R_NilValue) call = R_CurrentExpression; /* default behaves like error() */ SEXP sindex = ScalarReal((double) index); PROTECT(sindex); SEXP cond = R_makeOutOfBoundsError(x, subscript, sindex, call, NULL); PROTECT(cond); R_signalErrorCondition(cond, call); UNPROTECT(2); /* sindex, cond; not reached */ } static void NORET ECALL_OutOfBoundsCHAR(SEXP x, int subscript, SEXP sindex, SEXP call) { if (call == R_NilValue) call = R_CurrentExpression; /* default behaves like error() */ sindex = ScalarString(sindex); PROTECT(sindex); SEXP cond = R_makeOutOfBoundsError(x, subscript, sindex, call, NULL); PROTECT(cond); R_signalErrorCondition(cond, call); UNPROTECT(2); /* sindex, cond; not reached */ } /* This allows for the unusual case where x is of length 2, and x[[-m]] selects one element for m = 1, 2. So 'len' is only used if it is 2 and i is negative. */ static R_INLINE int integerOneIndex(int i, R_xlen_t len, SEXP call) { int indx = -1; if (i > 0) /* a regular 1-based index from R */ indx = i - 1; else if (i == 0 || len < 2) { ECALL3(call, _("attempt to select less than one element in %s"), "integerOneIndex"); } else if (len == 2 && i > -3) indx = 2 + i; else { ECALL3(call, _("attempt to select more than one element in %s"), "integerOneIndex"); } return indx; } /* Utility used (only in) do_subassign2_dflt(), i.e. "[[<-" in ./subassign.c : */ R_xlen_t attribute_hidden OneIndex(SEXP x, SEXP s, R_xlen_t nx, int partial, SEXP *newname, int pos, SEXP call) { SEXP names; R_xlen_t i, indx; const void *vmax; if (pos < 0 && length(s) > 1) { ECALL3(call, _("attempt to select more than one element in %s"), "OneIndex"); } if (pos < 0 && length(s) < 1) { ECALL3(call, _("attempt to select less than one element in %s"), "OneIndex"); } if(pos < 0) pos = 0; indx = -1; *newname = R_NilValue; switch(TYPEOF(s)) { case LGLSXP: case INTSXP: indx = integerOneIndex(INTEGER_ELT(s, pos), nx, call); break; case REALSXP: { double dblind = REAL_ELT(s, pos); if(!ISNAN(dblind)) { /* see comment above integerOneIndex */ if (dblind >= 1) indx = (R_xlen_t)(dblind - 1); else if (dblind > -1 || nx < 2) { ECALL3(call, _("attempt to select less than one element in %s"), "OneIndex "); } else if (nx == 2 && dblind > -3) indx = 2 + (R_xlen_t) dblind; else { ECALL3(call, _("attempt to select more than one element in %s"), "OneIndex "); } } break; } case STRSXP: vmax = vmaxget(); names = getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { PROTECT(names); /* Try for exact match */ for (i = 0; i < nx; i++) { const char *tmp = translateChar(STRING_ELT(names, i)); if (!tmp[0]) continue; if (streql(tmp, translateChar(STRING_ELT(s, pos)))) { indx = i; break; } } // Try for partial match -- not ever used in current R (partial is 0) if (partial && indx < 0) { size_t l = strlen(translateChar(STRING_ELT(s, pos))); for(i = 0; i < nx; i++) { const char *tmp = translateChar(STRING_ELT(names, i)); if (!tmp[0]) continue; if(!strncmp(tmp, translateChar(STRING_ELT(s, pos)), l)) { if(indx == -1 ) indx = i; else indx = -2; } } } UNPROTECT(1); /* names */ } if (indx == -1) indx = nx; *newname = STRING_ELT(s, pos); vmaxset(vmax); break; case SYMSXP: vmax = vmaxget(); names = getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { PROTECT(names); for (i = 0; i < nx; i++) if (streql(translateChar(STRING_ELT(names, i)), translateChar(PRINTNAME(s)))) { indx = i; break; } UNPROTECT(1); /* names */ } if (indx == -1) indx = nx; *newname = PRINTNAME(s); vmaxset(vmax); break; default: ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return indx; } /* used here and in subset.c and subassign.c */ R_xlen_t attribute_hidden get1index(SEXP s, SEXP names, R_xlen_t len, int pok, int pos, SEXP call) { /* Get a single index for the [[ and [[<- operators. Checks that only one index is being selected. Returns -1 for no match. s is the subscript len is the length of the object or dimension, with names its (dim)names. pos is len-1 or -1 for [[, -1 for [[<- -1 means use the only element of length-1 s. pok : is "partial ok" ? if pok is -1, warn if partial matching occurs, but allow. */ const char *ss, *cur_name; const void *vmax; int warn_pok = (pok == -1); if (warn_pok) pok = 1; if (pos < 0 && length(s) != 1) { if (length(s) > 1) { ECALL3(call, _("attempt to select more than one element in %s"), "get1index"); } else { ECALL3(call, _("attempt to select less than one element in %s"), "get1index"); } } else if(pos >= length(s)) { ECALL(call, _("internal error in use of recursive indexing")); } if(pos < 0) pos = 0; R_xlen_t indx = -1; switch (TYPEOF(s)) { case LGLSXP: case INTSXP: { int i = INTEGER_ELT(s, pos); if (i != NA_INTEGER) indx = integerOneIndex(i, len, call); break; } case REALSXP: { double dblind = REAL_ELT(s, pos); if(!ISNAN(dblind)) { /* see comment above integerOneIndex */ if (dblind >= 1) { if(R_FINITE(dblind)) indx = (R_xlen_t)(dblind - 1); } else if (dblind > -1 || len < 2) { ECALL3(call, (dblind <= -1) ? _("invalid negative subscript in %s") : _("attempt to select less than one element in %s"), "get1index "); } else if (len == 2 && dblind > -3) // dblind = -2 or -1 {why exception ?} indx = 2 + (R_xlen_t) dblind; else { ECALL3(call, (dblind <= -1) ? _("invalid negative subscript in %s") : _("attempt to select more than one element in %s"), "get1index "); } } break; } case STRSXP: /* NA matches nothing */ if(STRING_ELT(s, pos) == NA_STRING) break; /* "" matches nothing: see names.Rd */ if(!CHAR(STRING_ELT(s, pos))[0]) break; /* Try for exact match */ vmax = vmaxget(); ss = translateChar(STRING_ELT(s, pos)); for (R_xlen_t i = 0; i < xlength(names); i++) if (STRING_ELT(names, i) != NA_STRING) { if (streql(translateChar(STRING_ELT(names, i)), ss)) { indx = i; break; } } /* Try for partial match */ if (pok && indx < 0) { size_t len = strlen(ss); for(R_xlen_t i = 0; i < xlength(names); i++) { if (STRING_ELT(names, i) != NA_STRING) { cur_name = translateChar(STRING_ELT(names, i)); if(!strncmp(cur_name, ss, len)) { if(indx == -1) {/* first one */ indx = i; if (warn_pok) { if (call == R_NilValue) warning(_("partial match of '%s' to '%s'"), ss, cur_name); else warningcall(call, _("partial match of '%s' to '%s'"), ss, cur_name); } } else { indx = -2;/* more than one partial match */ if (warn_pok) /* already given context */ warningcall(R_NilValue, _("further partial match of '%s' to '%s'"), ss, cur_name); break; } } } } } vmaxset(vmax); break; case SYMSXP: vmax = vmaxget(); for (R_xlen_t i = 0; i < xlength(names); i++) if (STRING_ELT(names, i) != NA_STRING && streql(translateChar(STRING_ELT(names, i)), CHAR(PRINTNAME(s)))) { indx = i; vmaxset(vmax); break; } break; default: ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return indx; } /* This is used for [[ and [[<- with a vector of indices of length > 1 . x is a list or pairlist, and it is indexed recusively from level start to level stop-1. ( 0...len-1 or 0..len-2 then len-1). For [[<- it needs to duplicate if substructure might be shared. */ SEXP attribute_hidden vectorIndex(SEXP x, SEXP thesub, int start, int stop, int pok, SEXP call, Rboolean dup) { int i; R_xlen_t offset; SEXP cx; /* sanity check */ if (dup && MAYBE_SHARED(x)) error("should only be called in an assignment context."); for(i = start; i < stop; i++) { if(!isVectorList(x) && !isPairList(x)) { if (i) errorcall(call, _("recursive indexing failed at level %d\n"), i+1); else errorcall(call, _("attempt to select more than one element in %s"), "vectorIndex"); } PROTECT(x); SEXP names = PROTECT(getAttrib(x, R_NamesSymbol)); offset = get1index(thesub, names, xlength(x), pok, i, call); UNPROTECT(2); /* x, names */ if(offset < 0 || offset >= xlength(x)) errorcall(call, _("no such index at level %d\n"), i+1); if(isPairList(x)) { #ifdef LONG_VECTOR_SUPPORT if (offset > R_SHORT_LEN_MAX) error("invalid subscript for pairlist"); #endif cx = nthcdr(x, (int) offset); RAISE_NAMED(CAR(x), NAMED(x)); x = CAR(cx); if (dup && MAYBE_SHARED(x)) { PROTECT(cx); x = shallow_duplicate(x); SETCAR(cx, x); UNPROTECT(1); /* cx */ } } else { cx = x; x = VECTOR_ELT(x, offset); RAISE_NAMED(x, NAMED(cx)); if (dup && MAYBE_SHARED(x)) { PROTECT(cx); x = shallow_duplicate(x); SET_VECTOR_ELT(cx, offset, x); UNPROTECT(1); /* cx */ } } } return x; } /* Special Matrix Subscripting: Handles the case x[i] where x is an n-way array and i is a matrix with n columns. This code returns a vector containing the subscripts to be extracted when x is regarded as unravelled. Negative indices are not allowed. A zero/NA anywhere in a row will cause a zero/NA in the same position in the result. */ /* Called from `[` VectorSubset() ./subset.c and `[<-` VectorSubassign() ./subassign.c only if(isMatrix(s) && isArray(x) && ncols(s) == "length(dim(x))"), where dims = getAttrib(x, R_DimSymbol); */ SEXP attribute_hidden mat2indsub(SEXP dims, SEXP s, SEXP call, SEXP x) { int nrs = nrows(s); R_xlen_t NR = nrs; SEXP rvec; int ndim = LENGTH(dims); const int *pdims = INTEGER_RO(dims); if (ncols(s) != ndim) { ECALL(call, _("incorrect number of columns in matrix subscript")); } #ifdef LONG_VECTOR_SUPPORT /* Check if it is a long vector we need to index */ R_xlen_t len = 1; for (int j = 0; j < ndim; j++) len *= pdims[j]; if(len > R_SHORT_LEN_MAX) { PROTECT(rvec = allocVector(REALSXP, nrs)); double *rv = REAL(rvec); for (int i = 0; i < nrs; i++) rv[i] = 1.; // 1-based. if (TYPEOF(s) == REALSXP) { for (int i = 0; i < nrs; i++) { R_xlen_t tdim = 1; const double *ps = REAL_RO(s); for (int j = 0; j < ndim; j++) { double k = ps[i + j * NR]; if(ISNAN(k)) {rv[i] = NA_REAL; break;} k = trunc(k); // = "as.integer(k)" for large k if(k < 0) { ECALL(call, _("negative values are not allowed in a matrix subscript")); } if(k == 0.) {rv[i] = 0.; break;} if (k > pdims[j]) { ECALL_OutOfBounds(x, j, (R_xlen_t)k, call); } rv[i] += (k - 1.) * tdim; tdim *= pdims[j]; } } } else { s = coerceVector(s, INTSXP); const int *ps = INTEGER_RO(s); for (int i = 0; i < nrs; i++) { R_xlen_t tdim = 1; for (int j = 0; j < ndim; j++) { int k = ps[i + j * NR]; if(k == NA_INTEGER) {rv[i] = NA_REAL; break;} if(k < 0) { ECALL(call, _("negative values are not allowed in a matrix subscript")); } if(k == 0) {rv[i] = 0.; break;} if (k > pdims[j]) { ECALL_OutOfBounds(x, j, k, call); } rv[i] += (double) ((k - 1) * tdim); tdim *= pdims[j]; } } } } else #endif { PROTECT(rvec = allocVector(INTSXP, nrs)); int *iv = INTEGER(rvec); for (int i = 0; i < nrs; i++) iv[i] = 1; // 1-based. s = coerceVector(s, INTSXP); int *ps = INTEGER(s); for (int i = 0; i < nrs; i++) { int tdim = 1; for (int j = 0; j < ndim; j++) { int k = ps[i + j * NR]; if(k == NA_INTEGER) {iv[i] = NA_INTEGER; break;} if(k < 0) { ECALL(call, _("negative values are not allowed in a matrix subscript")); } if(k == 0) {iv[i] = 0; break;} if (k > pdims[j]) { ECALL_OutOfBounds(x, j, k, call); } iv[i] += (k - 1) * tdim; tdim *= pdims[j]; } } } UNPROTECT(1); return rvec; } /* Special Matrix Subscripting: For the case x[i] where x is an n-way array and i is a character matrix with n columns, this code converts i to an integer matrix by matching against the dimnames of x. NA values in any row of i propagate to the result. Unmatched entries result in a subscript out of bounds error. */ /* Called from `[` VectorSubset() ./subset.c and `[<-` VectorSubassign() ./subassign.c only if(isMatrix(s) && isArray(x) && ncols(s) == "length(dim(x))"), where dnamelist = PROTECT(GetArrayDimnames(x)) */ SEXP attribute_hidden strmat2intmat(SEXP s, SEXP dnamelist, SEXP call, SEXP x) { /* XXX: assumes all args are protected */ SEXP dim = getAttrib(s, R_DimSymbol); int nr = INTEGER(dim)[0], // = nrow(s) nc = INTEGER(dim)[1]; // = ncol(s) if(isNull(dnamelist)) { // && nr > 0) #if 1 ECALL(call, _("no 'dimnames' attribute for array")); // as in arraySubscript() #else ECALL4(call, _("%d x %d character matrix subscript illegal for NULL dimnames"), nr, nc); #endif } #if 0 /* unneeded as only called if (length(dnamelist) == nc) : */ if(length(dnamelist) != nc) { ECALL4(call, _("subscripting by character matrix of %d columns does not match length(dimnames(x)) == %d"), nc, length(dnamelist)); } #endif SEXP snames = PROTECT(allocVector(STRSXP, nr)); // result si := an integer matrix of same dimension as 's' SEXP si = PROTECT(allocVector(INTSXP, xlength(s))); dimgets(si, dim); int *psi = INTEGER(si); memset(psi, 0, XLENGTH(si) * sizeof(int)); for (int i = 0; i < nc; i++) { R_xlen_t iNR = i * (R_xlen_t) nr; for (int j = 0; j < nr; j++) SET_STRING_ELT(snames, j, STRING_ELT(s, j + iNR)); SEXP dnames = VECTOR_ELT(dnamelist, i); SEXP sicol = PROTECT(match(dnames, snames, 0)); for (int j = 0; j < nr; j++) { int v = INTEGER_ELT(sicol, j); R_xlen_t idx = j + iNR; SEXP s_elt = STRING_ELT(s, idx); if (s_elt == NA_STRING) v = NA_INTEGER; else if (!CHAR(s_elt)[0]) v = 0; /* disallow "" match */ if (v == 0) ECALL_OutOfBoundsCHAR(x, i, s_elt, call); psi[idx] = v; } UNPROTECT(1); } UNPROTECT(2); return si; } static SEXP nullSubscript(R_xlen_t n) { SEXP indx; #ifdef LONG_VECTOR_SUPPORT if (n > R_SHORT_LEN_MAX) { indx = allocVector(REALSXP, n); double *pindx = REAL(indx); for (R_xlen_t i = 0; i < n; i++) pindx[i] = (double)(i + 1); } else #endif { indx = allocVector(INTSXP, n); int *pindx = INTEGER(indx); for (int i = 0; i < n; i++) pindx[i] = i + 1; } return indx; } static SEXP logicalSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call) { R_xlen_t count, i, nmax, i1, i2; int canstretch; SEXP indx; canstretch = *stretch > 0; if (!canstretch && ns > nx) { ECALL(call, _("(subscript) logical subscript too long")); } nmax = (ns > nx) ? ns : nx; *stretch = (ns > nx) ? ns : 0; if (ns == 0) return(allocVector(INTSXP, 0)); const int *ps = LOGICAL_RO(s); /* Calling LOCICAL_RO here may force a large allocation, but no larger than the one made by R_alloc below. This could use rewriting to better handle a sparse logical index. */ #ifdef LONG_VECTOR_SUPPORT if (nmax > R_SHORT_LEN_MAX) { if (ns == nmax) { /* no recycling - use fast single-index code */ const void *vmax = vmaxget(); double *buf = (double *) R_alloc(nmax, sizeof(double)); count = 0; R_ITERATE_CHECK(NINTERRUPT, nmax, i, if (ps[i]) { if (ps[i] == NA_LOGICAL) buf[count++] = NA_REAL; else buf[count++] = (double)(i + 1); }); PROTECT(indx = allocVector(REALSXP, count)); memcpy(REAL(indx), buf, sizeof(double) * count); vmaxset(vmax); UNPROTECT(1); return indx; } count = 0; /* we only need to scan s once even if we recycle, just remember the total count as well as the count for the last incomplete chunk (if any) */ i1 = (ns < nmax) ? (nmax % ns) : 0; if (i1 > 0) { /* last recycling chunk is incomplete - we have to get the truncated count as well */ R_xlen_t rem = 0; for (i = 0; i < ns; i++) { if (i == i1) rem = count; if (ps[i]) count++; } count = count * (nmax / ns) + rem; } else { /* nested recycling, total is sufficient */ for (i = 0; i < ns; i++) if (ps[i]) count++; count *= nmax / ns; } PROTECT(indx = allocVector(REALSXP, count)); double *pindx = REAL(indx); count = 0; MOD_ITERATE_CHECK(NINTERRUPT, nmax, ns, nmax, i, i1, i2, if (ps[i1]) { if (ps[i1] == NA_LOGICAL) pindx[count++] = NA_REAL; else pindx[count++] = (double)(i + 1); }); UNPROTECT(1); return indx; } #endif // else --- the same code for non-long vectors -------------------------- if (ns == nmax) { /* no recycling - use fast single-index code */ const void *vmax = vmaxget(); int *buf = (int *) R_alloc(nmax, sizeof(int)); count = 0; R_ITERATE_CHECK(NINTERRUPT, nmax, i, if (ps[i]) { if (ps[i] == NA_LOGICAL) buf[count++] = NA_INTEGER; else buf[count++] = (int)(i + 1); }); PROTECT(indx = allocVector(INTSXP, count)); memcpy(INTEGER(indx), buf, sizeof(int) * count); vmaxset(vmax); UNPROTECT(1); return indx; } count = 0; /* we only need to scan s once even if we recycle, just remember the total count as well as the count for the last incomplete chunk (if any) */ i1 = (ns < nmax) ? (nmax % ns) : 0; if (i1 > 0) { /* last recycling chunk is incomplete - we have to get the truncated count as well */ R_xlen_t rem = 0; for (i = 0; i < ns; i++) { if (i == i1) rem = count; if (ps[i]) count++; } count = count * (nmax / ns) + rem; } else { /* nested recycling, total is sufficient */ for (i = 0; i < ns; i++) if (ps[i]) count++; count *= nmax / ns; } PROTECT(indx = allocVector(INTSXP, count)); int *pindx = INTEGER(indx); count = 0; MOD_ITERATE_CHECK(NINTERRUPT, nmax, ns, nmax, i, i1, i2, if (ps[i1]) { if (ps[i1] == NA_LOGICAL) pindx[count++] = NA_INTEGER; else pindx[count++] = (int)(i + 1); }); UNPROTECT(1); return indx; } static SEXP negativeSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, SEXP call) { SEXP indx; R_xlen_t stretch = 0; R_xlen_t i; PROTECT(indx = allocVector(LGLSXP, nx)); int *pindx = LOGICAL(indx); for (i = 0; i < nx; i++) pindx[i] = 1; const int *ps = INTEGER_RO(s); for (i = 0; i < ns; i++) { int ix = ps[i]; if (ix != 0 && ix != NA_INTEGER && -ix <= nx) pindx[-ix - 1] = 0; } s = logicalSubscript(indx, nx, nx, &stretch, call); UNPROTECT(1); return s; } static SEXP positiveSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx) { SEXP indx; R_xlen_t i, zct = 0; const int *ps = INTEGER_RO(s); for (i = 0; i < ns; i++) if (ps[i] == 0) zct++; if (zct) { indx = allocVector(INTSXP, (ns - zct)); int *pindx = INTEGER(indx); for (i = 0, zct = 0; i < ns; i++) if (ps[i] != 0) pindx[zct++] = ps[i]; return indx; } else return s; } static SEXP integerSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call, SEXP x) { R_xlen_t i; int ii, neg, max, canstretch; Rboolean isna = FALSE; canstretch = *stretch > 0; *stretch = 0; neg = FALSE; max = 0; const int *ps = INTEGER_RO(s); for (i = 0; i < ns; i++) { ii = ps[i]; if (ii < 0) { if (ii == NA_INTEGER) isna = TRUE; else neg = TRUE; } else if (ii > max) max = ii; } if (max > nx) { if(canstretch) *stretch = max; else { ECALL_OutOfBounds(x, -1, max, call); } } if (neg) { if (max == 0 && !isna) return negativeSubscript(s, ns, nx, call); else { ECALL(call, _("only 0's may be mixed with negative subscripts")); } } else return positiveSubscript(s, ns, nx); return R_NilValue; } static SEXP realSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, R_xlen_t *stretch, SEXP call, SEXP x) { int canstretch = *stretch > 0; *stretch = 0; double min = 0, max = 0; const double *ps = REAL_RO(s); Rboolean isna = FALSE; for (R_xlen_t i = 0; i < ns; i++) { double ii = ps[i]; if (R_FINITE(ii)) { if (ii < min) min = ii; if (ii > max) max = ii; } else isna = TRUE; } if (max >= nx+1.) { #ifndef LONG_VECTOR_SUPPORT if (max > INT_MAX) { ECALL(call, _("subscript too large for 32-bit R")); } #endif if(canstretch) *stretch = (R_xlen_t) max; else { ECALL_OutOfBounds(x, -1, (R_xlen_t) max, call); } } if (min <= -1) { if (max < 1 && !isna) { R_xlen_t i, stretch = 0; SEXP indx = PROTECT(allocVector(LGLSXP, nx)); int *pindx = LOGICAL(indx); for (i = 0; i < nx; i++) pindx[i] = 1; for (i = 0; i < ns; i++) { double dx = ps[i]; if (R_FINITE(dx) && dx <= -1 && -dx < nx+1.) { R_xlen_t ix = (R_xlen_t)(-dx - 1); pindx[ix] = 0; } } s = logicalSubscript(indx, nx, nx, &stretch, call); UNPROTECT(1); return s; } else { ECALL(call, _("only 0's may be mixed with negative subscripts")); } } else { /* Only return a REALSXP index if we need to */ SEXP indx; R_xlen_t i, cnt = 0; Rboolean int_ok = TRUE; /* NB, indices will be truncated eventually, so need to do that to take '0' into account */ for (i = 0; i < ns; i++) { double ds = ps[i]; #ifdef OLDCODE_LONG_VECTOR if (!R_FINITE(ds)) { if (ds > INT_MAX) int_ok = FALSE; cnt++; } else if ((R_xlen_t) ds != 0) cnt++; #else if (R_FINITE(ds) && ds > INT_MAX) int_ok = FALSE; if (!R_FINITE(ds) || (R_xlen_t) ds != 0) cnt++; #endif } if (int_ok) { indx = allocVector(INTSXP, cnt); int *pindx = INTEGER(indx); for (i = 0, cnt = 0; i < ns; i++) { double ds = ps[i]; int ia; if (!R_FINITE(ds)) ia = NA_INTEGER; else ia = (int) ds; if (ia != 0) pindx[cnt++] = ia; } } else { indx = allocVector(REALSXP, cnt); double *pindx = REAL(indx); for (i = 0, cnt = 0; i < ns; i++) { double ds = ps[i]; if (!R_FINITE(ds) || (R_xlen_t) ds != 0) pindx[cnt++] = ds; } } return indx; } return R_NilValue; } /* This uses a couple of horrible hacks in conjunction with * VectorAssign (in subassign.c). If subscripting is used for * assignment, it is possible to extend a vector by supplying new * names, and we want to give the extended vector those names, so they * are returned as the use.names attribute. Also, unset elements of the vector * of new names (places where a match was found) are indicated by * setting the element of the newnames vector to NULL. */ /* The original code (pre 2.0.0) used a ns x nx loop that was too * slow. So now we hash. Hashing is expensive on memory (up to 32nx * bytes) so it is only worth doing if ns * nx is large. If nx is * large, then it will be too slow unless ns is very small. */ static SEXP stringSubscript(SEXP s, R_xlen_t ns, R_xlen_t nx, SEXP names, R_xlen_t *stretch, SEXP call, SEXP x, int dim) { SEXP indx, indexnames = R_NilValue; R_xlen_t i, j, nnames, extra, sub; int canstretch = *stretch > 0; /* product may overflow, so check factors as well. */ Rboolean usehashing = ( ((ns > 1000 && nx) || (nx > 1000 && ns)) || (ns * nx > 15*nx + ns) ); int nprotect = 0; PROTECT(s); PROTECT(names); nprotect += 2; nnames = nx; extra = nnames; /* Process each of the subscripts. First we compare with the names * on the vector and then (if there is no match) with each of the * previous subscripts, since (if assigning) we may have already * added an element of that name. (If we are not assigning, any * nonmatch will have given an error.) */ if(usehashing) { /* must be internal, so names contains a character vector */ /* NB: this does not behave in the same way with respect to "" and NA names: they will match */ PROTECT(indx = match(names, s, 0)); /**** guaranteed to be fresh???*/ nprotect++; /* second pass to correct this */ int *pindx = INTEGER(indx); for (i = 0; i < ns; i++) if(STRING_ELT(s, i) == NA_STRING || !CHAR(STRING_ELT(s, i))[0]) pindx[i] = 0; } else { PROTECT(indx = allocVector(INTSXP, ns)); nprotect++; int *pindx = INTEGER(indx); for (i = 0; i < ns; i++) { sub = 0; if (names != R_NilValue) { for (j = 0; j < nnames; j++) { SEXP names_j = STRING_ELT(names, j); if (NonNullStringMatch(STRING_ELT(s, i), names_j)) { sub = j + 1; break; } } } pindx[i] = (int) sub; } } int *pindx = INTEGER(indx); SEXP sindx = NULL; for (i = 0; i < ns; i++) { sub = pindx[i]; if (sub == 0) { if (sindx == NULL) { sindx = PROTECT(match(s, s, 0)); indexnames = PROTECT(allocVector(VECSXP, ns)); nprotect += 2; for (int z = 0; z < ns; z++) SET_VECTOR_ELT(indexnames, z, R_NilValue); } int j = INTEGER(sindx)[i] - 1; if(STRING_ELT(s, i) != NA_STRING && CHAR(STRING_ELT(s, i))[0]) { sub = pindx[j]; SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, j)); } } if (sub == 0) { if (!canstretch) { ECALL_OutOfBoundsCHAR(x, dim, STRING_ELT(s, i), call); } extra += 1; sub = extra; SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, i)); } pindx[i] = (int) sub; } /* We return the new names as the names attribute of the returned subscript vector. */ if (extra != nnames) setAttrib(indx, R_UseNamesSymbol, indexnames); if (canstretch) *stretch = extra; UNPROTECT(nprotect); return indx; } /* Array Subscripts. dim is the dimension (0 to k-1) s is the subscript list, dims is the dimensions of x dng is a function (usually getAttrib) that obtains the dimnames x is the array to be subscripted. */ attribute_hidden SEXP int_arraySubscript(int dim, SEXP s, SEXP dims, SEXP x, SEXP call) { int nd, ns; R_xlen_t stretch = 0; SEXP dnames, tmp; ns = length(s); nd = INTEGER_ELT(dims, dim); switch (TYPEOF(s)) { case NILSXP: return allocVector(INTSXP, 0); case LGLSXP: return logicalSubscript(s, ns, nd, &stretch, call); case INTSXP: return integerSubscript(s, ns, nd, &stretch, call, x); case REALSXP: /* We don't yet allow subscripts > R_SHORT_LEN_MAX */ PROTECT(tmp = coerceVector(s, INTSXP)); tmp = integerSubscript(tmp, ns, nd, &stretch, call, x); UNPROTECT(1); return tmp; case STRSXP: dnames = getAttrib(x, R_DimNamesSymbol); if (dnames == R_NilValue) { ECALL(call, _("no 'dimnames' attribute for array")); } dnames = VECTOR_ELT(dnames, dim); return stringSubscript(s, ns, nd, dnames, &stretch, call, x, dim); case SYMSXP: if (s == R_MissingArg) return nullSubscript(nd); default: ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return R_NilValue; } /* This is used by packages arules, cba, proxy and seriation. */ typedef SEXP AttrGetter(SEXP x, SEXP data); typedef SEXP (*StringEltGetter)(SEXP x, int i); SEXP arraySubscript(int dim, SEXP s, SEXP dims, AttrGetter dng, StringEltGetter strg, SEXP x) { return int_arraySubscript(dim, s, dims, x, R_NilValue); } /* Subscript creation. The first thing we do is check to see */ /* if there are any user supplied NULL's, these result in */ /* returning a vector of length 0. */ /* if stretch is zero on entry then the vector x cannot be "stretched", otherwise, stretch returns the new required length for x */ SEXP attribute_hidden makeSubscript(SEXP x, SEXP s, R_xlen_t *stretch, SEXP call) { if (! (isVector(x) || isList(x) || isLanguage(x))) { ECALL(call, _("subscripting on non-vector")); } R_xlen_t nx = xlength(x); /* special case for simple indices -- does not duplicate */ if (IS_SCALAR(s, INTSXP)) { int i = SCALAR_IVAL(s); if (0 < i && i <= nx) { *stretch = 0; return s; } } else if (IS_SCALAR(s, REALSXP)) { double di = SCALAR_DVAL(s); if (1 <= di && di <= nx) { *stretch = 0; /* We could only return a REALSXP if the value is too large for an INTSXP, but, as the calling code can handle REALSXP indices, returning the REALSXP avoids an allocation. */ return s; } } R_xlen_t ns = xlength(s); SEXP ans = R_NilValue; switch (TYPEOF(s)) { case NILSXP: *stretch = 0; ans = allocVector(INTSXP, 0); break; case LGLSXP: ans = logicalSubscript(s, ns, nx, stretch, call); break; case INTSXP: ans = integerSubscript(s, ns, nx, stretch, call, x); break; case REALSXP: ans = realSubscript(s, ns, nx, stretch, call, x); break; case STRSXP: { SEXP names = PROTECT(getAttrib(x, R_NamesSymbol)); /* *stretch = 0; */ ans = stringSubscript(s, ns, nx, names, stretch, call, x, -1); UNPROTECT(1); /* names */ break; } case SYMSXP: *stretch = 0; if (s == R_MissingArg) { ans = nullSubscript(nx); break; } default: ECALL3(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return ans; }