Expose u32 source offsets as usize

Computations should use usize, so this makes things more convenient.
Post-FFI we can make SourceRange fields private, to enforce this even easier.
This commit is contained in:
Johannes Altmanninger 2023-04-18 16:57:17 +02:00
parent 2ca27d2c5b
commit fc5e97e55e
4 changed files with 84 additions and 28 deletions

View File

@ -141,8 +141,7 @@ pub trait Node: Acceptor + ConcreteNode + std::fmt::Debug {
/// \return the source code for this node, or none if unsourced.
fn try_source<'s>(&self, orig: &'s wstr) -> Option<&'s wstr> {
self.try_source_range()
.map(|r| &orig[r.start as usize..r.end() as usize])
self.try_source_range().map(|r| &orig[r.start()..r.end()])
}
/// \return the source code for this node, or an empty string if unsourced.
@ -2506,16 +2505,16 @@ impl<'a> TokenStream<'a> {
result.may_be_variable_assignment = variable_assignment_equals_pos(text).is_some();
result.tok_error = token.error;
assert!(token.offset < SOURCE_OFFSET_INVALID);
result.source_start = token.offset;
result.source_length = token.length;
assert!(token.offset() < SOURCE_OFFSET_INVALID);
result.set_source_start(token.offset());
result.set_source_length(token.length());
if token.error != TokenizerError::none {
let subtoken_offset = token.error_offset_within_token;
let subtoken_offset = token.error_offset_within_token();
// Skip invalid tokens that have a zero length, especially if they are at EOF.
if subtoken_offset < result.source_length {
result.source_start += subtoken_offset;
result.source_length = token.error_length;
if subtoken_offset < result.source_length() {
result.set_source_start(result.source_start() + subtoken_offset);
result.set_source_length(token.error_length());
}
}
@ -2584,7 +2583,7 @@ macro_rules! parse_error_range {
FLOG!(ast_construction, "%*sparse error - begin unwinding", $self.spaces(), "");
// TODO: can store this conditionally dependent on flags.
if $range.start != SOURCE_OFFSET_INVALID {
if $range.start() != SOURCE_OFFSET_INVALID {
$self.errors.push($range);
}
@ -2592,8 +2591,8 @@ macro_rules! parse_error_range {
let mut err = ParseError::default();
err.text = text.unwrap();
err.code = $code;
err.source_start = $range.start as usize;
err.source_length = $range.length as usize;
err.source_start = $range.start();
err.source_length = $range.length();
errors.0.push(err);
}
}
@ -3384,8 +3383,8 @@ impl<'s> Populator<'s> {
"%*schomping range %u-%u",
self.spaces(),
"",
tok.source_start,
tok.source_length
tok.source_start(),
tok.source_length()
);
}
FLOG!(ast_construction, "%*sdone unwinding", self.spaces(), "");

View File

@ -12,7 +12,7 @@ use widestring_suffix::widestrs;
pub type SourceOffset = u32;
pub const SOURCE_OFFSET_INVALID: SourceOffset = SourceOffset::MAX;
pub const SOURCE_OFFSET_INVALID: usize = SourceOffset::MAX as _;
pub const SOURCE_LOCATION_UNKNOWN: usize = usize::MAX;
#[derive(Copy, Clone)]
@ -85,8 +85,10 @@ mod parse_constants_ffi {
}
extern "Rust" {
fn end(self: &SourceRange) -> u32;
fn contains_inclusive(self: &SourceRange, loc: u32) -> bool;
#[cxx_name = "end"]
fn end_ffi(self: &SourceRange) -> u32;
#[cxx_name = "contains_inclusive"]
fn contains_inclusive_ffi(self: &SourceRange, loc: u32) -> bool;
}
/// IMPORTANT: If the following enum table is modified you must also update token_type_description below.
@ -245,15 +247,34 @@ pub use parse_constants_ffi::{
};
impl SourceRange {
pub fn new(start: SourceOffset, length: SourceOffset) -> Self {
SourceRange { start, length }
pub fn new(start: usize, length: usize) -> Self {
SourceRange {
start: start.try_into().unwrap(),
length: length.try_into().unwrap(),
}
}
pub fn end(&self) -> SourceOffset {
pub fn start(&self) -> usize {
self.start.try_into().unwrap()
}
pub fn length(&self) -> usize {
self.length.try_into().unwrap()
}
pub fn end(&self) -> usize {
self.start
.checked_add(self.length)
.expect("Overflow")
.try_into()
.unwrap()
}
fn end_ffi(&self) -> u32 {
self.start.checked_add(self.length).expect("Overflow")
}
// \return true if a location is in this range, including one-past-the-end.
pub fn contains_inclusive(&self, loc: SourceOffset) -> bool {
pub fn contains_inclusive(&self, loc: usize) -> bool {
self.start() <= loc && loc - self.start() <= self.length()
}
fn contains_inclusive_ffi(&self, loc: u32) -> bool {
self.start <= loc && loc - self.start <= self.length
}
}

View File

@ -32,8 +32,8 @@ pub struct ParseToken {
pub may_be_variable_assignment: bool,
/// If this is a tokenizer error, that error.
pub tok_error: TokenizerError,
pub source_start: SourceOffset,
pub source_length: SourceOffset,
source_start: SourceOffset,
source_length: SourceOffset,
}
impl ParseToken {
@ -46,14 +46,26 @@ impl ParseToken {
is_newline: false,
may_be_variable_assignment: false,
tok_error: TokenizerError::none,
source_start: SOURCE_OFFSET_INVALID,
source_start: SOURCE_OFFSET_INVALID.try_into().unwrap(),
source_length: 0,
}
}
pub fn set_source_start(&mut self, value: usize) {
self.source_start = value.try_into().unwrap();
}
pub fn source_start(&self) -> usize {
self.source_start.try_into().unwrap()
}
pub fn set_source_length(&mut self, value: usize) {
self.source_length = value.try_into().unwrap();
}
pub fn source_length(&self) -> usize {
self.source_length.try_into().unwrap()
}
/// \return the source range.
/// Note the start may be invalid.
pub fn range(&self) -> SourceRange {
SourceRange::new(self.source_start, self.source_length)
SourceRange::new(self.source_start(), self.source_length())
}
/// \return whether we are a string with the dash prefix set.
pub fn is_dash_prefix_string(&self) -> bool {

View File

@ -269,7 +269,7 @@ impl Tok {
Tok {
offset: 0,
length: 0,
error_offset_within_token: SOURCE_OFFSET_INVALID,
error_offset_within_token: SOURCE_OFFSET_INVALID.try_into().unwrap(),
error_length: 0,
error: TokenizerError::none,
type_: r#type,
@ -285,6 +285,30 @@ impl Tok {
fn get_source_ffi(self: &Tok, str: &CxxWString) -> UniquePtr<CxxWString> {
self.get_source(str.as_wstr()).to_ffi()
}
pub fn set_offset(&mut self, value: usize) {
self.offset = value.try_into().unwrap();
}
pub fn offset(&self) -> usize {
self.offset.try_into().unwrap()
}
pub fn length(&self) -> usize {
self.length.try_into().unwrap()
}
pub fn set_length(&mut self, value: usize) {
self.length = value.try_into().unwrap();
}
pub fn set_error_offset_within_token(&mut self, value: usize) {
self.error_offset_within_token = value.try_into().unwrap();
}
pub fn error_offset_within_token(&self) -> usize {
self.error_offset_within_token.try_into().unwrap()
}
pub fn error_length(&self) -> usize {
self.error_length.try_into().unwrap()
}
pub fn set_error_length(&mut self, value: usize) {
self.error_length = value.try_into().unwrap();
}
}
/// The tokenizer struct.
@ -818,8 +842,8 @@ impl Tokenizer {
}
let mut result = Tok::new(TokenType::string);
result.offset = buff_start as u32;
result.length = (self.token_cursor - buff_start) as u32;
result.set_offset(buff_start);
result.set_length(self.token_cursor - buff_start);
result
}
}