Skip to content

add support for automatically parsing headers from the value during encode #57

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@ let package = Package(
products: [
.library(name: "CodableCSV", targets: ["CodableCSV"]),
],
dependencies: [],
dependencies: [
.package(url: "https://github.com/apple/swift-collections.git", from: "1.0.5")
],
targets: [
.target(name: "CodableCSV", dependencies: [], path: "sources"),
.target(name: "CodableCSV", dependencies: [
.product(name: "Collections", package: "swift-collections")
], path: "sources"),
.testTarget(name: "CodableCSVTests", dependencies: ["CodableCSV"], path: "tests"),
.testTarget(name: "CodableCSVBenchmarks", dependencies: ["CodableCSV"], path: "benchmarks")
]
Expand Down
199 changes: 199 additions & 0 deletions sources/Strategy.swift
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import Foundation

/// The strategies to use when encoding/decoding.
public enum Strategy {
/// The strategy to allow/disable escaped fields and how.
Expand Down Expand Up @@ -35,3 +37,200 @@ public enum Strategy {
case convert(positiveInfinity: String, negativeInfinity: String, nan: String)
}
}


/// The strategy to use for automatically changing the value of keys before decoding.
/// - NOTE: sourced from: https://github.com/apple/swift-foundation/blob/9a9e3c15bb14020b69cf5b2f95694a257f329c41/Sources/FoundationEssentials/JSON/JSONDecoder.swift#L103
public enum KeyDecodingStrategy : Sendable {
/// Use the keys specified by each type. This is the default strategy.
case useDefaultKeys

/// Convert from "snake_case_keys" to "camelCaseKeys" before attempting to match a key with the one specified by each type.
///
/// The conversion to upper case uses `Locale.system`, also known as the ICU "root" locale. This means the result is consistent regardless of the current user's locale and language preferences.
///
/// Converting from snake case to camel case:
/// 1. Capitalizes the word starting after each `_`
/// 2. Removes all `_`
/// 3. Preserves starting and ending `_` (as these are often used to indicate private variables or other metadata).
/// For example, `one_two_three` becomes `oneTwoThree`. `_one_two_three_` becomes `_oneTwoThree_`.
///
/// - Note: Using a key decoding strategy has a nominal performance cost, as each string key has to be inspected for the `_` character.
case convertFromSnakeCase

/// Provide a custom conversion from the key in the encoded JSON to the keys specified by the decoded types.
/// The full path to the current decoding position is provided for context (in case you need to locate this key within the payload). The returned key is used in place of the last component in the coding path before decoding.
/// If the result of the conversion is a duplicate key, then only one value will be present in the container for the type to decode from.
@preconcurrency
case custom(@Sendable (_ key: String) -> String)

static func _convertFromSnakeCase(_ stringKey: String) -> String {
guard !stringKey.isEmpty else { return stringKey }

// Find the first non-underscore character
guard let firstNonUnderscore = stringKey.firstIndex(where: { $0 != "_" }) else {
// Reached the end without finding an _
return stringKey
}

// Find the last non-underscore character
var lastNonUnderscore = stringKey.index(before: stringKey.endIndex)
while lastNonUnderscore > firstNonUnderscore && stringKey[lastNonUnderscore] == "_" {
stringKey.formIndex(before: &lastNonUnderscore)
}

let keyRange = firstNonUnderscore...lastNonUnderscore
let leadingUnderscoreRange = stringKey.startIndex..<firstNonUnderscore
let trailingUnderscoreRange = stringKey.index(after: lastNonUnderscore)..<stringKey.endIndex

let components = stringKey[keyRange].split(separator: "_")
let joinedString: String
if components.count == 1 {
// No underscores in key, leave the word as is - maybe already camel cased
joinedString = String(stringKey[keyRange])
} else {
joinedString = ([components[0].lowercased()] + components[1...].map { $0.capitalized }).joined()
}

// Do a cheap isEmpty check before creating and appending potentially empty strings
let result: String
if (leadingUnderscoreRange.isEmpty && trailingUnderscoreRange.isEmpty) {
result = joinedString
} else if (!leadingUnderscoreRange.isEmpty && !trailingUnderscoreRange.isEmpty) {
// Both leading and trailing underscores
result = String(stringKey[leadingUnderscoreRange]) + joinedString + String(stringKey[trailingUnderscoreRange])
} else if (!leadingUnderscoreRange.isEmpty) {
// Just leading
result = String(stringKey[leadingUnderscoreRange]) + joinedString
} else {
// Just trailing
result = joinedString + String(stringKey[trailingUnderscoreRange])
}
return result
}
}


/// The strategy to use for automatically changing the value of keys before encoding.
/// - NOTE: sourced from: `https://github.com/apple/swift-foundation/blob/9a9e3c15bb14020b69cf5b2f95694a257f329c41/Sources/FoundationEssentials/JSON/JSONEncoder.swift#L112`
public enum KeyEncodingStrategy : Sendable {
/// Use the keys specified by each type. This is the default strategy.
case useDefaultKeys

/// Convert from "camelCaseKeys" to "snake_case_keys" before writing a key to JSON payload.
///
/// Capital characters are determined by testing membership in Unicode General Categories Lu and Lt.
/// The conversion to lower case uses `Locale.system`, also known as the ICU "root" locale. This means the result is consistent regardless of the current user's locale and language preferences.
///
/// Converting from camel case to snake case:
/// 1. Splits words at the boundary of lower-case to upper-case
/// 2. Inserts `_` between words
/// 3. Lowercases the entire string
/// 4. Preserves starting and ending `_`.
///
/// For example, `oneTwoThree` becomes `one_two_three`. `_oneTwoThree_` becomes `_one_two_three_`.
///
/// - Note: Using a key encoding strategy has a nominal performance cost, as each string key has to be converted.
case convertToSnakeCase

/// Provide a custom conversion to the key in the encoded JSON from the keys specified by the encoded types.
/// The full path to the current encoding position is provided for context (in case you need to locate this key within the payload). The returned key is used in place of the last component in the coding path before encoding.
/// If the result of the conversion is a duplicate key, then only one value will be present in the result.
@preconcurrency
case custom(@Sendable (_ string: String) -> String)

static func convertToSnakeCase(_ stringKey: String) -> String {
guard !stringKey.isEmpty else { return stringKey }

var words : [Range<String.Index>] = []
// The general idea of this algorithm is to split words on transition from lower to upper case, then on transition of >1 upper case characters to lowercase
//
// myProperty -> my_property
// myURLProperty -> my_url_property
//
// We assume, per Swift naming conventions, that the first character of the key is lowercase.
var wordStart = stringKey.startIndex
var searchRange = stringKey.index(after: wordStart)..<stringKey.endIndex

// Find next uppercase character
while let upperCaseRange = stringKey.rangeOfCharacter(from: CharacterSet.uppercaseLetters, options: [], range: searchRange) {
let untilUpperCase = wordStart..<upperCaseRange.lowerBound
words.append(untilUpperCase)

// Find next lowercase character
searchRange = upperCaseRange.lowerBound..<searchRange.upperBound
guard let lowerCaseRange = stringKey.rangeOfCharacter(from: CharacterSet.lowercaseLetters, options: [], range: searchRange) else {
// There are no more lower case letters. Just end here.
wordStart = searchRange.lowerBound
break
}

// Is the next lowercase letter more than 1 after the uppercase? If so, we encountered a group of uppercase letters that we should treat as its own word
let nextCharacterAfterCapital = stringKey.index(after: upperCaseRange.lowerBound)
if lowerCaseRange.lowerBound == nextCharacterAfterCapital {
// The next character after capital is a lower case character and therefore not a word boundary.
// Continue searching for the next upper case for the boundary.
wordStart = upperCaseRange.lowerBound
} else {
// There was a range of >1 capital letters. Turn those into a word, stopping at the capital before the lower case character.
let beforeLowerIndex = stringKey.index(before: lowerCaseRange.lowerBound)
words.append(upperCaseRange.lowerBound..<beforeLowerIndex)

// Next word starts at the capital before the lowercase we just found
wordStart = beforeLowerIndex
}
searchRange = lowerCaseRange.upperBound..<searchRange.upperBound
}
words.append(wordStart..<searchRange.upperBound)
let result = words.map({ (range) in
return stringKey[range].lowercased()
}).joined(separator: "_")
return result
}
}


/// The strategy to use for automatically encoding the TimeZone
public enum TimeZoneEncodingStrategy : Sendable {
case identifier
case abbreviation
case secondsFromGMT

case json

case custom(_ encoding: (_ value: TimeZone, _ encoder: Encoder) throws -> Void)
}

/// The strategy to use for encoding the header
public enum HeaderEncodingStrategy : Sendable {
/// will encode headers if they are provided, otherwise will not include a header row
case automatic
/// will try to parse the headers from the properties being encoded
case parseFromValue
}


public enum TimeZoneDecodingStrategy : Sendable {
case identifier
case abbreviation
case secondsFromGMT

case json

/// Decode the `Date` as a custom value decoded by the given closure. If the closure fails to decode a value from the given decoder, the error will be bubled up.
///
/// Custom `Date` decoding adheres to the same behavior as a custom `Decodable` type. For example:
///
/// let decoder = CSVDecoder()
/// decoder.dateStrategy = .custom({
/// let container = try $0.singleValueContainer()
/// let string = try container.decode(String.self)
/// // Now returns the date represented by the custom string or throw an error if the string cannot be converted to a date.
/// })
///
/// - parameter decoding: Function receiving the CSV decoder used to parse a custom `Date` value.
/// - parameter decoder: The decoder on which to fetch a single value container to obtain the underlying `String` value.
/// - returns: `Date` value decoded from the underlying storage.
case custom(_ decoding: (_ decoder: Decoder) throws -> TimeZone)
}

14 changes: 7 additions & 7 deletions sources/declarative/decodable/Decoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ extension CSVDecoder {
/// Returns a value of the type you specify, decoded from a CSV file (given as a `Data` blob).
/// - parameter type: The type of the value to decode from the supplied file.
/// - parameter data: The data blob representing a CSV file.
open func decode<T:Decodable>(_ type: T.Type, from data: Data) throws -> T {
public func decode<T:Decodable>(_ type: T.Type, from data: Data) throws -> T {
let reader = try CSVReader(input: data, configuration: self._configuration.readerConfiguration)
return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) {
try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: []))
Expand All @@ -44,7 +44,7 @@ extension CSVDecoder {
/// Returns a value of the type you specify, decoded from a CSV file (given as a `String`).
/// - parameter type: The type of the value to decode from the supplied file.
/// - parameter string: A Swift string representing a CSV file.
open func decode<T:Decodable>(_ type: T.Type, from string: String) throws -> T {
public func decode<T:Decodable>(_ type: T.Type, from string: String) throws -> T {
let reader = try CSVReader(input: string, configuration: self._configuration.readerConfiguration)
return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) {
try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: []))
Expand All @@ -54,7 +54,7 @@ extension CSVDecoder {
/// Returns a value of the type you specify, decoded from a CSV file (being pointed by the url).
/// - parameter type: The type of the value to decode from the supplied file.
/// - parameter url: The URL pointing to the file to decode.
open func decode<T:Decodable>(_ type: T.Type, from url: URL) throws -> T {
public func decode<T:Decodable>(_ type: T.Type, from url: URL) throws -> T {
let reader = try CSVReader(input: url, configuration: self._configuration.readerConfiguration)
return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) {
try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: []))
Expand All @@ -64,7 +64,7 @@ extension CSVDecoder {
/// Returns a value of the type you specify, decoded from a CSV file (provided by the input stream).
/// - parameter type: The type of the value to decode from the supplied file.
/// - parameter stream: The input stream providing the raw bytes.
open func decode<T:Decodable>(_ type: T.Type, from stream: InputStream) throws -> T {
public func decode<T:Decodable>(_ type: T.Type, from stream: InputStream) throws -> T {
let reader = try CSVReader(input: stream, configuration: self._configuration.readerConfiguration)
return try withExtendedLifetime(ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)) {
try T(from: ShadowDecoder(source: .passUnretained($0), codingPath: []))
Expand All @@ -76,7 +76,7 @@ extension CSVDecoder {
/// Returns a sequence for decoding row-by-row from a CSV file (given as a `Data` blob).
/// - parameter data: The data blob representing a CSV file.
/// - throws: `CSVError<CSVReader>` exclusively.
open func lazy(from data: Data) throws -> Lazy {
public func lazy(from data: Data) throws -> Lazy {
let reader = try CSVReader(input: data, configuration: self._configuration.readerConfiguration)
let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)
return Lazy(source: source)
Expand All @@ -85,7 +85,7 @@ extension CSVDecoder {
/// Returns a sequence for decoding row-by-row from a CSV file (given as a `String`).
/// - parameter string: A Swift string representing a CSV file.
/// - throws: `CSVError<CSVReader>` exclusively.
open func lazy(from string: String) throws -> Lazy {
public func lazy(from string: String) throws -> Lazy {
let reader = try CSVReader(input: string, configuration: self._configuration.readerConfiguration)
let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)
return Lazy(source: source)
Expand All @@ -94,7 +94,7 @@ extension CSVDecoder {
/// Returns a sequence for decoding row-by-row from a CSV file (being pointed by `url`).
/// - parameter url: The URL pointing to the file to decode.
/// - throws: `CSVError<CSVReader>` exclusively.
open func lazy(from url: URL) throws -> Lazy {
public func lazy(from url: URL) throws -> Lazy {
let reader = try CSVReader(input: url, configuration: self._configuration.readerConfiguration)
let source = ShadowDecoder.Source(reader: reader, configuration: self._configuration, userInfo: self.userInfo)
return Lazy(source: source)
Expand Down
3 changes: 3 additions & 0 deletions sources/declarative/decodable/DecoderConfiguration.swift
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ extension CSVDecoder {
public var dataStrategy: Strategy.DataDecoding
/// The amount of CSV rows kept in memory after decoding to allow the random-order jumping exposed by keyed containers.
public var bufferingStrategy: Strategy.DecodingBuffer
/// The strategy to use when encoding timeZones
public var timeZoneStrategy: TimeZoneDecodingStrategy

/// Designated initializer setting the default values.
public init() {
Expand All @@ -30,6 +32,7 @@ extension CSVDecoder {
self.dateStrategy = .deferredToDate
self.dataStrategy = .base64
self.bufferingStrategy = .keepAll
self.timeZoneStrategy = .identifier
}

/// Gives direct access to all CSV reader's configuration values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ extension ShadowDecoder.KeyedContainer {
func decode<T>(_ type: T.Type, forKey key: Key) throws -> T where T:Decodable {
if T.self == Date.self {
return try self._fieldContainer(forKey: key).decode(Date.self) as! T
} else if T.self == TimeZone.self {
return try self._fieldContainer(forKey: key).decode(TimeZone.self) as! T
} else if T.self == Data.self {
return try self._fieldContainer(forKey: key).decode(Data.self) as! T
} else if T.self == Decimal.self {
Expand Down
Loading