Search code examples
rlevels

Is there a limit to the number of levels in R?


I am wondering if there is a limitation on the number of levels for a factor?

I am trying to restructure some curriculums from Xing. The selectable industries are around 135 different ones. My code looks like that, as I mentioned there are 135 different industries in my actual code.

companyIndustryLevels <- c("","ACADEMIA", "ACCOUNTING", "AEROSPACE")
levels(samples[[1]]$Industry) <- companyIndustryLevels

The following combinations work fine and are selectable when filtering the list.

genderLevels <- c("M","F")
companySizeLevels <- c("","1","1-10","11-50","51-200","201-500","501-1000","1001-5000","5001-10000","10001+")
levels(samples[[1]]$Gender) <- genderLevels
levels(samples[[1]]$CompanySize) <- companySizeLevels

So the problem is, that when viewing the list, the industry column only shows factor with 1 level, not with 135 levels.

EDIT: I am using RStudio Version 11.1.383 and R Version 3.4.3. As you can see in the reproductable example below the other columns like "Gender", "Beschäftigungsart", "Position", "Unternehmensgroesse" also got levels. When selecting the Filter in the View Window in RStudio I am able to filter all of the columns by their levels, except the "Industrie" column.

View(structure(
  list(
    ID = 1,
    Gender = structure(1L, .Label = c("M",
                                      "F"), class = "factor"),
    Bildungseinrichtungen = structure(1L, .Label = "", class = "factor"),
    Abschluss = structure(1L, .Label = "", class = "factor"),
    Studienfach = structure(1L, .Label = "", class = "factor"),
    Beschäftigungsart = structure(
      1L,
      .Label = c(
        "",
        "FULL_TIME_EMPLOYEE",
        "PART_TIME_EMPLOYEE",
        "INTERN",
        "FREELANCER",
        "OWNER",
        "PARTNER",
        "BOARD_MEMBER",
        "VOLUNTEER"
      ),
      class = "factor"
    ),
    Station.Start = NA,
    Station.Ende = NA,
    Bezeichnung = NA,
    Position = structure(
      1L,
      .Label = c(
        "",
        "STUDENT_INTERN",
        "ENTRY_LEVEL",
        "PROFESSIONAL_EXPERIENCED",
        "MANAGER_SUPERVISOR",
        "EXECUTIVE",
        "SENIOR_EXECUTIVE"
      ),
      class = "factor"
    ),
    Unternehmen = structure(1L, .Label = "AMA", class = "factor"),
    Unternehmensgroesse = structure(
      1L,
      .Label = c(
        "",
        "1",
        "1-10",
        "11-50",
        "51-200",
        "201-500",
        "501-1000",
        "1001-5000",
        "5001-10000",
        "10001+"
      ),
      class = "factor"
    ),
    Industrie = structure(
      1L,
      .Label = c(
        "ACADEMIA",
        "ACCOUNTING",
        "AEROSPACE",
        "AGRICULTURE",
        "AIRLINES",
        "ALTERNATIVE_MEDICINE",
        "APPAREL_AND_FASHION",
        "ARCHITECTURE_AND_PLANNING",
        "ARTS_AND_CRAFTS",
        "AUTOMOTIVE",
        "BANKING",
        "BIOTECHNOLOGY",
        "BROADCAST_MEDIA",
        "BUILDING_MATERIALS",
        "BUSINESS_SUPPLIES_AND_EQUIPMENT",
        "CHEMICALS",
        "CIVIC_AND_SOCIAL_ORGANIZATIONS",
        "CIVIL_ENGINEERING",
        "CIVIL_SERVICE",
        "COMPOSITES",
        "COMPUTER_AND_NETWORK_SECURITY",
        "COMPUTER_GAMES",
        "COMPUTER_HARDWARE",
        "COMPUTER_NETWORKING",
        "COMPUTER_SOFTWARE",
        "CONSTRUCTION",
        "CONSULTING",
        "CONSUMER_ELECTRONICS",
        "CONSUMER_GOODS",
        "CONSUMER_SERVICES",
        "COSMETICS",
        "DAYCARE",
        "DEFENSE_MILITARY",
        "DESIGN",
        "EDUCATION",
        "ELEARNING",
        "ELECTRICAL_ENGINEERING",
        "ENERGY",
        "ENTERTAINMENT",
        "ENVIRONMENTAL_SERVICES",
        "EVENTS_SERVICES",
        "FACILITIES_SERVICES",
        "FACILITY_MANAGEMENT",
        "FINANCIAL_SERVICES",
        "FISHERY",
        "FOOD",
        "FUNDRAISING",
        "FURNITURE",
        "GARDENING_LANDSCAPING",
        "GEOLOGY",
        "GLASS_AND_CERAMICS",
        "GRAPHIC_DESIGN",
        "HEALTH_AND_FITNESS",
        "HOSPITALITY",
        "HUMAN_RESOURCES",
        "IMPORT_AND_EXPORT",
        "INDUSTRIAL_AUTOMATION",
        "INFORMATION_SERVICES",
        "INFORMATION_TECHNOLOGY_AND_SERVICES",
        "INSURANCE",
        "INTERNATIONAL_AFFAIRS",
        "INTERNATIONAL_TRADE_AND_DEVELOPMENT",
        "INTERNET",
        "INVESTMENT_BANKING",
        "JOURNALISM",
        "LEGAL_SERVICES",
        "LEISURE_TRAVEL_AND_TOURISM",
        "LIBRARIES",
        "LOGISTICS_AND_SUPPLY_CHAIN",
        "LUXURY_GOODS_AND_JEWELRY",
        "MACHINERY",
        "MANAGEMENT_CONSULTING",
        "MARITIME",
        "MARKETING_AND_ADVERTISING",
        "MARKET_RESEARCH",
        "MECHANICAL_INDUSTRIAL_ENGINEERING",
        "MEDIA_PRODUCTION",
        "MEDICAL_DEVICES",
        "MEDICAL_SERVICES",
        "MEDICINAL_PRODUCTS",
        "METAL_METALWORKING",
        "METROLOGY_CONTROL_ENGINEERING",
        "MINING_AND_METALS",
        "MOTION_PICTURES",
        "MUSEUMS_AND_CULTURAL_INSTITUTIONS",
        "MUSIC",
        "NANOTECHNOLOGY",
        "NON_PROFIT_ORGANIZATION",
        "NURSING_AND_PERSONAL_CARE",
        "OIL_AND_ENERGY",
        "ONLINE_MEDIA",
        "OTHERS",
        "OUTSOURCING_OFFSHORING",
        "PACKAGING_AND_CONTAINERS",
        "PAPER_AND_FOREST_PRODUCTS",
        "PHOTOGRAPHY",
        "PLASTICS",
        "POLITICS",
        "PRINTING",
        "PRINT_MEDIA",
        "PROCESS_MANAGEMENT",
        "PROFESSIONAL_TRAINING_AND_COACHING",
        "PSYCHOLOGY_PSYCHOTHERAPY",
        "PUBLIC_HEALTH",
        "PUBLIC_RELATIONS_AND_COMMUNICATIONS",
        "PUBLISHING",
        "RAILROAD",
        "REAL_ESTATE",
        "RECREATIONAL_FACILITIES_AND_SERVICES",
        "RECYCLING_AND_WASTE_MANAGEMENT",
        "RENEWABLES_AND_ENVIRONMENT",
        "RESEARCH",
        "RESTAURANTS_AND_FOOD_SERVICE",
        "RETAIL",
        "SECURITY_AND_INVESTIGATIONS",
        "SEMICONDUCTORS",
        "SHIPBUILDING",
        "SPORTS",
        "STAFFING_AND_RECRUITING",
        "TAX_ACCOUNTANCY_AUDITING",
        "TELECOMMUNICATION",
        "TEXTILES",
        "THEATER_STAGE_CINEMA",
        "TIMBER",
        "TRAFFIC_ENGINEERING",
        "TRANSLATION_AND_LOCALIZATION",
        "TRANSPORT",
        "VENTURE_CAPITAL_AND_PRIVATE_EQUITY",
        "VETERINARY",
        "WELFARE_AND_COMMUNITY_HEALTH",
        "WHOLESALE",
        "WINE_AND_SPIRITS",
        "WRITING_AND_EDITING",
        "PHARMACEUTICALS"
      ),
      class = "factor"
    )
  ),
  .Names = c(
    "ID",
    "Gender",
    "Bildungseinrichtungen",
    "Abschluss",
    "Studienfach",
    "Beschäftigungsart",
    "Station.Start",
    "Station.Ende",
    "Bezeichnung",
    "Position",
    "Unternehmen",
    "Unternehmensgroesse",
    "Industrie"
  ),
  row.names = 1L,
  class = "data.frame"
))

Solution

  • It seems as if the Filtering option in RStudio's Data Viewer (View()) offers a drop down menu for a factor, when its number of levels (nlevels()) is less than 65. Otherwise it defaults to a search field:

    df <- data.frame(x=as.factor(1:64))
    View(df)
    # "filter" yields a drop down menu 
    
    df <- data.frame(x=as.factor(1:65))
    View(df)
    # "filter" yields a search field 
    
    RStudio.Version()$version
    # [1] ‘1.0.143’
    

    Note that this has nothing to do with R itself, as already mentioned in the comments.