diff --git a/ChangeLog.md b/ChangeLog.md index 1904f1d1db..614f5f4745 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -25,6 +25,7 @@ Bug fixes: * `stack init --solver` fails if `GHC_PACKAGE_PATH` is present [#860](https://github.com/commercialhaskell/stack/issues/860) * `stack solver` and `stack init --solver` check for test suite and benchmark dependencies [#862](https://github.com/commercialhaskell/stack/issues/862) +* More intelligent logic for setting UTF-8 locale environment variables (#856)[https://github.com/commercialhaskell/stack/issues/856] ## 0.1.3.1 diff --git a/src/Stack/Setup.hs b/src/Stack/Setup.hs index efc00afb67..f278c94655 100644 --- a/src/Stack/Setup.hs +++ b/src/Stack/Setup.hs @@ -200,6 +200,8 @@ setupEnv mResolveMissingGHC = do executablePath <- liftIO getExecutablePath + utf8EnvVars <- getUtf8LocaleVars menv + envRef <- liftIO $ newIORef Map.empty let getEnvOverride' es = do m <- readIORef envRef @@ -217,7 +219,7 @@ setupEnv mResolveMissingGHC = do else id) $ (if esLocaleUtf8 es - then Map.insert "LC_ALL" "C.UTF-8" + then Map.union utf8EnvVars else id) -- For reasoning and duplication, see: https://github.com/fpco/stack/issues/70 @@ -985,3 +987,137 @@ removeHaskellEnvVars = Map.delete "HASKELL_PACKAGE_SANDBOX" . Map.delete "HASKELL_PACKAGE_SANDBOXES" . Map.delete "HASKELL_DIST_DIR" + +-- | Get map of environment variables to set to change the locale's encoding to UTF-8 +getUtf8LocaleVars + :: forall m env. + (MonadReader env m, HasPlatform env, MonadLogger m, MonadCatch m, MonadBaseControl IO m, MonadIO m) + => EnvOverride -> m (Map Text Text) +getUtf8LocaleVars menv = do + Platform _ os <- asks getPlatform + if isWindows os + then + -- On Windows, locale is controlled by the code page, so we don't set any environment + -- variables. + return + Map.empty + else do + let checkedVars = map checkVar (Map.toList $ eoTextMap menv) + -- List of environment variables that will need to be updated to set UTF-8 (because + -- they currently do not specify UTF-8). + needChangeVars = concatMap fst checkedVars + -- Set of locale-related environment variables that have already have a value. + existingVarNames = Set.unions (map snd checkedVars) + -- True if a locale is already specified by one of the "global" locale variables. + hasAnyExisting = + or $ + map + (`Set.member` existingVarNames) + ["LANG", "LANGUAGE", "LC_ALL"] + if null needChangeVars && hasAnyExisting + then + -- If no variables need changes and at least one "global" variable is set, no + -- changes to environment need to be made. + return + Map.empty + else do + -- Get a list of known locales by running @locale -a@. + elocales <- tryProcessStdout Nothing menv "locale" ["-a"] + let + -- Filter the list to only include locales with UTF-8 encoding. + utf8Locales = + case elocales of + Left _ -> [] + Right locales -> + filter + isUtf8Locale + (T.lines $ + T.decodeUtf8With + T.lenientDecode + locales) + mfallback = getFallbackLocale utf8Locales + when + (isNothing mfallback) + ($logWarn + "Warning: unable to set locale to UTF-8 encoding; GHC may fail with 'invalid character'") + let + -- Get the new values of variables to adjust. + changes = + Map.unions $ + map + (adjustedVarValue utf8Locales mfallback) + needChangeVars + -- Get the values of variables to add. + adds + | hasAnyExisting = + -- If we already have a "global" variable, then nothing needs + -- to be added. + Map.empty + | otherwise = + -- If we don't already have a "global" variable, then set LANG to the + -- fallback. + case mfallback of + Nothing -> Map.empty + Just fallback -> + Map.singleton "LANG" fallback + return (Map.union changes adds) + where + -- Determines whether an environment variable is locale-related and, if so, whether it needs to + -- be adjusted. + checkVar + :: (Text, Text) -> ([Text], Set Text) + checkVar (k,v) = + if k `elem` ["LANG", "LANGUAGE"] || "LC_" `T.isPrefixOf` k + then if isUtf8Locale v + then ([], Set.singleton k) + else ([k], Set.singleton k) + else ([], Set.empty) + -- Adjusted value of an existing locale variable. Looks for valid UTF-8 encodings with + -- same language /and/ territory, then with same language, and finally the first UTF-8 locale + -- returned by @locale -a@. + adjustedVarValue + :: [Text] -> Maybe Text -> Text -> Map Text Text + adjustedVarValue utf8Locales mfallback k = + case Map.lookup k (eoTextMap menv) of + Nothing -> Map.empty + Just v -> + case concatMap + (matchingLocales utf8Locales) + [ T.takeWhile (/= '.') v <> "." + , T.takeWhile (/= '_') v <> "_"] of + (v':_) -> Map.singleton k v' + [] -> + case mfallback of + Just fallback -> Map.singleton k fallback + Nothing -> Map.empty + -- Determine the fallback locale, by looking for any UTF-8 locale prefixed with the list in + -- @fallbackPrefixes@, and if not found, picking the first UTF-8 encoding returned by @locale + -- -a@. + getFallbackLocale + :: [Text] -> Maybe Text + getFallbackLocale utf8Locales = do + case concatMap (matchingLocales utf8Locales) fallbackPrefixes of + (v:_) -> Just v + [] -> + case utf8Locales of + [] -> Nothing + (v:_) -> Just v + -- Filter the list of locales for any with the given prefixes (case-insitive). + matchingLocales + :: [Text] -> Text -> [Text] + matchingLocales utf8Locales prefix = + filter + (\v -> + (T.toLower prefix) `T.isPrefixOf` T.toLower v) + utf8Locales + -- Does the locale have one of the encodings in @utf8Suffixes@ (case-insensitive)? + isUtf8Locale locale = + or $ + map + (\v -> + T.toLower v `T.isSuffixOf` T.toLower locale) + utf8Suffixes + -- Prefixes of fallback locales (case-insensitive) + fallbackPrefixes = ["C.", "en_US.", "en_"] + -- Suffixes of UTF-8 locales (case-insensitive) + utf8Suffixes = [".UTF-8", ".utf8"]